[Lldb-commits] [lldb] da827d0 - [lldb][DataFormatter] Simplify std::unordered_map::iterator formatter (#97754)

via lldb-commits lldb-commits at lists.llvm.org
Mon Jul 8 06:39:02 PDT 2024


Author: Michael Buch
Date: 2024-07-08T14:38:58+01:00
New Revision: da827d0896e5e66fe9130f8f4479537d3bbee1da

URL: https://github.com/llvm/llvm-project/commit/da827d0896e5e66fe9130f8f4479537d3bbee1da
DIFF: https://github.com/llvm/llvm-project/commit/da827d0896e5e66fe9130f8f4479537d3bbee1da.diff

LOG: [lldb][DataFormatter] Simplify std::unordered_map::iterator formatter (#97754)

Depends on https://github.com/llvm/llvm-project/pull/97752

This patch changes the way we retrieve the key/value pair in the
`std::unordered_map::iterator` formatter (similar to how we are changing
it for `std::map::iterator` in
https://github.com/llvm/llvm-project/pull/97713, the motivations being
the same).

The old logic was not very easy to follow, and encoded the libc++ layout
in non-obvious ways. But mainly it was also fragile to alignment
miscalculations (https://github.com/llvm/llvm-project/pull/97443); this
would break once the new layout of `std::unordered_map` landed as part
of https://github.com/llvm/llvm-project/issues/93069.

Instead, this patch simply casts the `__hash_iterator` to a
`__node_pointer` (which is what libc++ does too) and uses a
straightforward `GetChildMemberWithName("__value_")` to get to the
key/value we care about.

The `std::unordered_map` already does it this way, so we align the
iterator counterpart to do the same. We can eventually re-use the
core-part of the `std::unordered_map` and `std::unordered_map::iterator`
formatters. But it will be an easier to change to review once both
simplifications landed.

Added: 
    

Modified: 
    lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
index f5be2f5a5c317..93e7f4f4fd86c 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
@@ -52,26 +52,6 @@ class LibcxxStdUnorderedMapSyntheticFrontEnd
   std::vector<std::pair<ValueObject *, uint64_t>> m_elements_cache;
 };
 
-/// Formats libcxx's std::unordered_map iterators
-///
-/// In raw form a std::unordered_map::iterator is represented as follows:
-///
-/// (lldb) var it --raw --ptr-depth 1
-/// (std::__1::__hash_map_iterator<
-///    std::__1::__hash_iterator<
-///      std::__1::__hash_node<
-///        std::__1::__hash_value_type<
-///            std::__1::basic_string<char, std::__1::char_traits<char>,
-///            std::__1::allocator<char> >, std::__1::basic_string<char,
-///            std::__1::char_traits<char>, std::__1::allocator<char> > >,
-///        void *> *> >)
-///  it = {
-///   __i_ = {
-///     __node_ = 0x0000600001700040 {
-///       __next_ = 0x0000600001704000
-///     }
-///   }
-/// }
 class LibCxxUnorderedMapIteratorSyntheticFrontEnd
     : public SyntheticChildrenFrontEnd {
 public:
@@ -90,9 +70,6 @@ class LibCxxUnorderedMapIteratorSyntheticFrontEnd
   size_t GetIndexOfChildWithName(ConstString name) override;
 
 private:
-  ValueObject *m_iter_ptr = nullptr; ///< Held, not owned. Child of iterator
-                                     ///< ValueObject supplied at construction.
-
   lldb::ValueObjectSP m_pair_sp; ///< ValueObject for the key/value pair
                                  ///< that the iterator currently points
                                  ///< to.
@@ -304,7 +281,6 @@ lldb_private::formatters::LibCxxUnorderedMapIteratorSyntheticFrontEnd::
 lldb::ChildCacheState lldb_private::formatters::
     LibCxxUnorderedMapIteratorSyntheticFrontEnd::Update() {
   m_pair_sp.reset();
-  m_iter_ptr = nullptr;
 
   ValueObjectSP valobj_sp = m_backend.GetSP();
   if (!valobj_sp)
@@ -315,98 +291,66 @@ lldb::ChildCacheState lldb_private::formatters::
   if (!target_sp)
     return lldb::ChildCacheState::eRefetch;
 
-  if (!valobj_sp)
+  // Get the unordered_map::iterator
+  // m_backend is an 'unordered_map::iterator', aka a
+  // '__hash_map_iterator<__hash_table::iterator>'
+  //
+  // __hash_map_iterator::__i_ is a __hash_table::iterator (aka
+  // __hash_iterator<__node_pointer>)
+  auto hash_iter_sp = valobj_sp->GetChildMemberWithName("__i_");
+  if (!hash_iter_sp)
     return lldb::ChildCacheState::eRefetch;
 
-  auto exprPathOptions = ValueObject::GetValueForExpressionPathOptions()
-                             .DontCheckDotVsArrowSyntax()
-                             .SetSyntheticChildrenTraversal(
-                                 ValueObject::GetValueForExpressionPathOptions::
-                                     SyntheticChildrenTraversal::None);
-
-  // This must be a ValueObject* because it is a child of the ValueObject we
-  // are producing children for it if were a ValueObjectSP, we would end up
-  // with a loop (iterator -> synthetic -> child -> parent == iterator) and
-  // that would in turn leak memory by never allowing the ValueObjects to die
-  // and free their memory.
-  m_iter_ptr =
-      valobj_sp
-          ->GetValueForExpressionPath(".__i_.__node_", nullptr, nullptr,
-                                      exprPathOptions, nullptr)
-          .get();
-
-  if (m_iter_ptr) {
-    auto iter_child(valobj_sp->GetChildMemberWithName("__i_"));
-    if (!iter_child) {
-      m_iter_ptr = nullptr;
-      return lldb::ChildCacheState::eRefetch;
-    }
-
-    CompilerType node_type(iter_child->GetCompilerType()
-                               .GetTypeTemplateArgument(0)
-                               .GetPointeeType());
-
-    CompilerType pair_type(node_type.GetTypeTemplateArgument(0));
-
-    std::string name;
-    uint64_t bit_offset_ptr;
-    uint32_t bitfield_bit_size_ptr;
-    bool is_bitfield_ptr;
-
-    pair_type = pair_type.GetFieldAtIndex(
-        0, name, &bit_offset_ptr, &bitfield_bit_size_ptr, &is_bitfield_ptr);
-    if (!pair_type) {
-      m_iter_ptr = nullptr;
-      return lldb::ChildCacheState::eRefetch;
-    }
+  // Type is '__hash_iterator<__node_pointer>'
+  auto hash_iter_type = hash_iter_sp->GetCompilerType();
+  if (!hash_iter_type.IsValid())
+    return lldb::ChildCacheState::eRefetch;
 
-    uint64_t addr = m_iter_ptr->GetValueAsUnsigned(LLDB_INVALID_ADDRESS);
-    m_iter_ptr = nullptr;
+  // Type is '__node_pointer'
+  auto node_pointer_type = hash_iter_type.GetTypeTemplateArgument(0);
+  if (!node_pointer_type.IsValid())
+    return lldb::ChildCacheState::eRefetch;
 
-    if (addr == 0 || addr == LLDB_INVALID_ADDRESS)
-      return lldb::ChildCacheState::eRefetch;
+  // Cast the __hash_iterator to a __node_pointer (which stores our key/value
+  // pair)
+  auto hash_node_sp = hash_iter_sp->Cast(node_pointer_type);
+  if (!hash_node_sp)
+    return lldb::ChildCacheState::eRefetch;
 
-    auto ts = pair_type.GetTypeSystem();
-    auto ast_ctx = ts.dyn_cast_or_null<TypeSystemClang>();
-    if (!ast_ctx)
+  auto key_value_sp = hash_node_sp->GetChildMemberWithName("__value_");
+  if (!key_value_sp) {
+    // clang-format off
+    // Since D101206 (ba79fb2e1f), libc++ wraps the `__value_` in an
+    // anonymous union.
+    // Child 0: __hash_node_base base class
+    // Child 1: __hash_
+    // Child 2: anonymous union
+    // clang-format on
+    auto anon_union_sp = hash_node_sp->GetChildAtIndex(2);
+    if (!anon_union_sp)
       return lldb::ChildCacheState::eRefetch;
 
-    // Mimick layout of std::__hash_iterator::__node_ and read it in
-    // from process memory.
-    //
-    // The following shows the contiguous block of memory:
-    //
-    //         +-----------------------------+ class __hash_node_base
-    // __node_ | __next_pointer __next_;     |
-    //         +-----------------------------+ class __hash_node
-    //         | size_t __hash_;             |
-    //         | __node_value_type __value_; | <<< our key/value pair
-    //         +-----------------------------+
-    //
-    CompilerType tree_node_type = ast_ctx->CreateStructForIdentifier(
-        llvm::StringRef(),
-        {{"__next_",
-          ast_ctx->GetBasicType(lldb::eBasicTypeVoid).GetPointerType()},
-         {"__hash_", ast_ctx->GetBasicType(lldb::eBasicTypeUnsignedLongLong)},
-         {"__value_", pair_type}});
-    std::optional<uint64_t> size = tree_node_type.GetByteSize(nullptr);
-    if (!size)
-      return lldb::ChildCacheState::eRefetch;
-    WritableDataBufferSP buffer_sp(new DataBufferHeap(*size, 0));
-    ProcessSP process_sp(target_sp->GetProcessSP());
-    Status error;
-    process_sp->ReadMemory(addr, buffer_sp->GetBytes(),
-                           buffer_sp->GetByteSize(), error);
-    if (error.Fail())
+    key_value_sp = anon_union_sp->GetChildMemberWithName("__value_");
+    if (!key_value_sp)
       return lldb::ChildCacheState::eRefetch;
-    DataExtractor extractor(buffer_sp, process_sp->GetByteOrder(),
-                            process_sp->GetAddressByteSize());
-    auto pair_sp = CreateValueObjectFromData(
-        "pair", extractor, valobj_sp->GetExecutionContextRef(), tree_node_type);
-    if (pair_sp)
-      m_pair_sp = pair_sp->GetChildAtIndex(2);
   }
 
+  // Create the synthetic child, which is a pair where the key and value can be
+  // retrieved by querying the synthetic frontend for
+  // GetIndexOfChildWithName("first") and GetIndexOfChildWithName("second")
+  // respectively.
+  //
+  // std::unordered_map stores the actual key/value pair in
+  // __hash_value_type::__cc_ (or previously __cc).
+  auto potential_child_sp = key_value_sp->Clone(ConstString("pair"));
+  if (potential_child_sp)
+    if (potential_child_sp->GetNumChildrenIgnoringErrors() == 1)
+      if (auto child0_sp = potential_child_sp->GetChildAtIndex(0);
+          child0_sp->GetName() == "__cc_" || child0_sp->GetName() == "__cc")
+        potential_child_sp = child0_sp->Clone(ConstString("pair"));
+
+  m_pair_sp = potential_child_sp;
+
   return lldb::ChildCacheState::eRefetch;
 }
 

diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py
index d9e316b9b8f4e..efd7128cd6ac7 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/TestDataFormatterLibccIterator.py
@@ -59,3 +59,19 @@ def cleanup():
 
         self.expect("frame variable svI", substrs=['item = "hello"'])
         self.expect("expr svI", substrs=['item = "hello"'])
+
+        self.expect("frame variable iiumI", substrs=["first = 61453", "second = 51966"])
+        self.expect("expr iiumI", substrs=["first = 61453", "second = 51966"])
+
+        self.expect("frame variable siumI", substrs=['first = "hello"', "second = 137"])
+        self.expect("expr siumI", substrs=['first = "hello"', "second = 137"])
+
+        self.expect("frame variable iiumI.first", substrs=["first = 61453"])
+        self.expect("frame variable iiumI.first", substrs=["second"], matching=False)
+        self.expect("frame variable iiumI.second", substrs=["second = 51966"])
+        self.expect("frame variable iiumI.second", substrs=["first"], matching=False)
+
+        self.expect("frame variable siumI.first", substrs=['first = "hello"'])
+        self.expect("frame variable siumI.first", substrs=["second"], matching=False)
+        self.expect("frame variable siumI.second", substrs=["second = 137"])
+        self.expect("frame variable siumI.second", substrs=["first"], matching=False)

diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp
index 9d1cbfd912868..e53c0f167c325 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/iterator/main.cpp
@@ -1,38 +1,50 @@
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 
 typedef std::map<int, int> intint_map;
 typedef std::map<std::string, int> strint_map;
 
+typedef std::unordered_map<int, int> intint_umap;
+typedef std::unordered_map<std::string, int> strint_umap;
+
 typedef std::vector<int> int_vector;
 typedef std::vector<std::string> string_vector;
 
-typedef intint_map::iterator iimter;
-typedef strint_map::iterator simter;
+typedef intint_map::iterator ii_map_iter;
+typedef strint_map::iterator si_map_iter;
+typedef intint_umap::iterator ii_umap_iter;
+typedef strint_umap::iterator si_umap_iter;
 
 typedef int_vector::iterator ivter;
 typedef string_vector::iterator svter;
 
-int main()
-{
-	intint_map iim;
-	iim[0xABCD] = 0xF0F1;
+int main() {
+  intint_map iim;
+  iim[0xABCD] = 0xF0F1;
+
+  strint_map sim;
+  sim["world"] = 42;
+
+  intint_umap iium;
+  iium[0xF00D] = 0xCAFE;
 
-	strint_map sim;
-	sim["world"] = 42;
+  strint_umap sium;
+  sium["hello"] = 137;
 
-	int_vector iv;
-	iv.push_back(3);
+  int_vector iv;
+  iv.push_back(3);
 
-	string_vector sv;
-	sv.push_back("hello");
+  string_vector sv;
+  sv.push_back("hello");
 
-	iimter iimI = iim.begin();
-	simter simI = sim.begin();
+  ii_map_iter iimI = iim.begin();
+  si_map_iter simI = sim.begin();
+  ii_umap_iter iiumI = iium.begin();
+  si_umap_iter siumI = sium.begin();
 
-	ivter ivI = iv.begin();
-	svter svI = sv.begin();
+  ivter ivI = iv.begin();
+  svter svI = sv.begin();
 
-	return 0; // Set break point at this line.
+  return 0; // Set break point at this line.
 }


        


More information about the lldb-commits mailing list