[llvm] 78c9122 - [llvm][utils] Add DenseMap data formatters

Wed Jul 19 13:22:11 PDT 2023

Author: Dave Lee
Date: 2023-07-19T13:21:49-07:00
New Revision: 78c9122ac9ad5562d65852eb38322f131cffbfd6

URL: https://github.com/llvm/llvm-project/commit/78c9122ac9ad5562d65852eb38322f131cffbfd6
DIFF: https://github.com/llvm/llvm-project/commit/78c9122ac9ad5562d65852eb38322f131cffbfd6.diff

LOG: [llvm][utils] Add DenseMap data formatters

Add summary and synthetic data formatters for `llvm::DenseMap`.

This implementation avoids expression evaluation by using a heuristic. However, as
heuristics go, there is a corner case: A single deleted entry (a single "tombstone"),
will result in a child value with an invalid key but a valid value.

Instead of calling `getEmptyKey()` and `getTombstoneKey()` to determine which buckets
are empty, and which contain real key-values, the heuristic scans all buckets to
identify keys that exist only once. These singleton keys are considered valid.

The empty key will always exist multiple times. However the tombstone key may exist
zero, one, or many times. The heuristic has no problems when there are zero or many
tombstones, but when there is exactly one deleted entry (one tombstone), then the
heuristic will incorrectly identify it as valid.

Differential Revision: https://reviews.llvm.org/D137028

Added: 
    

Modified: 
    llvm/utils/lldbDataFormatters.py

Removed: 
    


################################################################################
diff  --git a/llvm/utils/lldbDataFormatters.py b/llvm/utils/lldbDataFormatters.py
index 29c17adf382e52..9ead7bfa4f7dd1 100644

--- a/llvm/utils/lldbDataFormatters.py
+++ b/llvm/utils/lldbDataFormatters.py
@@ -4,6 +4,7 @@
 Load into LLDB with 'command script import /path/to/lldbDataFormatters.py'
 """
 
+import collections
 import lldb
 import json
 
@@ -80,6 +81,17 @@ def __lldb_init_module(debugger, internal_dict):
     #     '-x "^llvm::PointerUnion<.+>$"'
     # )
 
+    debugger.HandleCommand(
+        "type summary add -w llvm "
+        "-e -F lldbDataFormatters.DenseMapSummary "
+        '-x "^llvm::DenseMap<.+>$"'
+    )
+    debugger.HandleCommand(
+        "type synthetic add -w llvm "
+        "-l lldbDataFormatters.DenseMapSynthetic "
+        '-x "^llvm::DenseMap<.+>$"'
+    )
+
 
 # Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl
 class SmallVectorSynthProvider:
@@ -341,3 +353,76 @@ def update(self):
             "", f"(int){self.val_expr_path}.getInt()"
         ).GetValueAsSigned()
         self.template_args = parse_template_parameters(self.valobj.GetType().name)
+
+
+def DenseMapSummary(valobj: lldb.SBValue, _) -> str:
+    raw_value = valobj.GetNonSyntheticValue()
+    num_entries = raw_value.GetChildMemberWithName("NumEntries").unsigned
+    num_tombstones = raw_value.GetChildMemberWithName("NumTombstones").unsigned
+
+    summary = f"size={num_entries}"
+    if num_tombstones == 1:
+        # The heuristic to identify valid entries does not handle the case of a
+        # single tombstone. The summary calls attention to this.
+        summary = f"tombstones=1, {summary}"
+    return summary
+
+
+class DenseMapSynthetic:
+    valobj: lldb.SBValue
+
+    # The indexes into `Buckets` that contain valid map entries.
+    child_buckets: list[int]
+
+    def __init__(self, valobj: lldb.SBValue, _) -> None:
+        self.valobj = valobj
+
+    def num_children(self) -> int:
+        return len(self.child_buckets)
+
+    def get_child_at_index(self, child_index: int) -> lldb.SBValue:
+        bucket_index = self.child_buckets[child_index]
+        entry = self.valobj.GetValueForExpressionPath(f".Buckets[{bucket_index}]")
+
+        # By default, DenseMap instances use DenseMapPair to hold key-value
+        # entries. When the entry is a DenseMapPair, unwrap it to expose the
+        # children as simple std::pair values.
+        #
+        # This entry type is customizable (a template parameter). For other
+        # types, expose the entry type as is.
+        if entry.type.name.startswith("llvm::detail::DenseMapPair<"):
+            entry = entry.GetChildAtIndex(0)
+
+        return entry.Clone(f"[{child_index}]")
+
+    def update(self):
+        self.child_buckets = []
+
+        num_entries = self.valobj.GetChildMemberWithName("NumEntries").unsigned
+        if num_entries == 0:
+            return
+
+        buckets = self.valobj.GetChildMemberWithName("Buckets")
+        num_buckets = self.valobj.GetChildMemberWithName("NumBuckets").unsigned
+
+        # Bucket entries contain one of the following:
+        #   1. Valid key-value
+        #   2. Empty key
+        #   3. Tombstone key (a deleted entry)
+        #
+        # NumBuckets is always greater than NumEntries. The empty key, and
+        # potentially the tombstone key, will occur multiple times. A key that
+        # is repeated is either the empty key or the tombstone key.
+
+        # For each key, collect a list of buckets it appears in.
+        key_buckets: dict[str, list[int]] = collections.defaultdict(list)
+        for index in range(num_buckets):
+            key = buckets.GetValueForExpressionPath(f"[{index}].first")
+            key_buckets[str(key.data)].append(index)
+
+        # Heuristic: This is not a multi-map, any repeated (non-unique) keys are
+        # either the the empty key or the tombstone key. Populate child_buckets
+        # with the indexes of entries containing unique keys.
+        for indexes in key_buckets.values():
+            if len(indexes) == 1:
+                self.child_buckets.append(indexes[0])