[PATCH] D137028: [llvm][utils] Add DenseMap data formatters (WIP)

Sat Oct 29 15:24:11 PDT 2022

kastiglione created this revision.
kastiglione added a reviewer: jingham.
Herald added a project: All.
kastiglione requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Add summary and synthetic data formatters for `llvm::DenseMap`.

This implementation avoids expression evaluation by using a heuristic. However, as heuristics go, there is a corner case: A single deleted entry (a single "tombstone"), will result in a child value with an invalid key but a valid value.

Instead of using `getEmptyKey()` and `getTombstoneKey()` to determine which buckets are empty, and which contain real key-values, the heuristic scans all buckets to identify keys that exist only once. These singleton keys are considered valid.

The empty key will always exist multiple times. However the tombstone key may exist zero, one, or many times. The heuristic has no problems when there are zero or many tombstones, but when there is exactly one deleted entry (one tombstone), then the heuristic will incorrectly identify it as valid.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D137028

Files:
  llvm/utils/lldbDataFormatters.py


Index: llvm/utils/lldbDataFormatters.py
===================================================================

--- llvm/utils/lldbDataFormatters.py
+++ llvm/utils/lldbDataFormatters.py
@@ -4,8 +4,11 @@
 Load into LLDB with 'command script import /path/to/lldbDataFormatters.py'
 """
 
+import collections
 import lldb
 import json
+from typing import Optional
+
 
 def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand('type category define -e llvm -l c++')
@@ -48,6 +51,12 @@
     debugger.HandleCommand('type synthetic add -w llvm '
                            '-l lldbDataFormatters.PointerUnionSynthProvider '
                            '-x "^llvm::PointerUnion<.+>$"')
+    debugger.HandleCommand('type summary add -w llvm '
+                           '-e -F lldbDataFormatters.DenseMapSummary '
+                           '-x "^llvm::DenseMap<.+>$"')
+    debugger.HandleCommand('type synthetic add -w llvm '
+                           '-l lldbDataFormatters.DenseMapSynthetic '
+                           '-x "^llvm::DenseMap<.+>$"')
 
 
 # Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl
@@ -291,3 +300,70 @@
         self.val_expr_path = get_expression_path(self.valobj.GetChildMemberWithName('Val'))
         self.active_type_tag = self.valobj.CreateValueFromExpression('', f'(int){self.val_expr_path}.getInt()').GetValueAsSigned()
         self.template_args = parse_template_parameters(self.valobj.GetType().name)
+
+
+def DenseMapSummary(valobj: lldb.SBValue, _) -> str:
+    num_entries = valobj.GetChildMemberWithName("NumEntries").unsigned
+    return f"size={num_entries}"
+
+
+class DenseMapSynthetic:
+    valobj: lldb.SBValue
+
+    # The bucket indexes containing child values.
+    child_buckets: list[int]
+
+    def __init__(self, valobj: lldb.SBValue, _) -> None:
+        self.valobj = valobj
+
+    def num_children(self) -> int:
+        return len(self.child_buckets)
+
+    def get_child_at_index(self, child_index: int) -> Optional[lldb.SBValue]:
+        bucket_index = self.child_buckets[child_index]
+        entry = self.valobj.GetValueForExpressionPath(
+            f".Buckets[{bucket_index}]"
+        )
+
+        # Get the std::pair from the DenseMapPair
+        if entry.type.name.startswith("llvm::detail::DenseMapPair<"):
+            entry = entry.GetChildAtIndex(0)
+
+        return entry.Clone(f"[{child_index}]")
+
+    def get_child_index(self, name: str) -> Optional[lldb.SBValue]:
+        return None
+
+    def update(self) -> bool:
+        self.child_buckets = []
+
+        num_entries = self.valobj.GetChildMemberWithName("NumEntries").unsigned
+        if num_entries == 0:
+            return
+
+        buckets = self.valobj.GetChildMemberWithName("Buckets")
+        num_buckets = self.valobj.GetChildMemberWithName("NumBuckets").unsigned
+
+        # Bucket entries contain are one of the following:
+        #   1. Valid key-value
+        #   2. Empty key
+        #   3. Tombstone key (marks a deleted value)
+        #
+        # NumBuckets is always greater than NumEntries. The empty key, and
+        # potentially the tombstone key, will occur multiple times. A key that
+        # is repeated is either the empty key or the tombstone key.
+
+        # For each key, collect a list of buckets it appears in.
+        key_buckets: dict[str, list[int]] = collections.defaultdict(list)
+        for index in range(num_buckets):
+            key = buckets.GetValueForExpressionPath(f"[{index}].first")
+            key_buckets[str(key.data)].append(index)
+
+        # Hueristic: Repated (non-unique) keys are either the the empty key or
+        # the tombstone key. Populate child_buckets with the bucket indexes of
+        # entries containing unique keys.
+        for indexes in key_buckets.values():
+            if len(indexes) == 1:
+                self.child_buckets.append(indexes[0])
+
+        return True


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D137028.471790.patch
Type: text/x-patch
Size: 3923 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221029/207ed905/attachment.bin>