[llvm-branch-commits] [mlir] [MLIR][LLVM] Use CyclicReplacerCache for recursive DIType import (PR #98203)

Billy Zhu via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 9 12:35:08 PDT 2024


https://github.com/zyx-billy updated https://github.com/llvm/llvm-project/pull/98203

>From a0b59b246a1b8860fad16f1f74537c38b5abf2cf Mon Sep 17 00:00:00 2001
From: Billy Zhu <billyzhu at modular.com>
Date: Tue, 9 Jul 2024 10:28:21 -0700
Subject: [PATCH] use cyclic cache in importer and update tests

---
 mlir/lib/Target/LLVMIR/DebugImporter.cpp     | 143 ++++---------------
 mlir/lib/Target/LLVMIR/DebugImporter.h       | 101 ++-----------
 mlir/test/Target/LLVMIR/Import/debug-info.ll |  34 +++--
 3 files changed, 51 insertions(+), 227 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp
index 15737aa681c59..f104b72209c39 100644
--- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp
+++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp
@@ -28,7 +28,7 @@ using namespace mlir::LLVM::detail;
 
 DebugImporter::DebugImporter(ModuleOp mlirModule,
                              bool dropDICompositeTypeElements)
-    : recursionPruner(mlirModule.getContext()),
+    : cache([&](llvm::DINode *node) { return createRecSelf(node); }),
       context(mlirModule.getContext()), mlirModule(mlirModule),
       dropDICompositeTypeElements(dropDICompositeTypeElements) {}
 
@@ -287,16 +287,9 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) {
     return nullptr;
 
   // Check for a cached instance.
-  if (DINodeAttr attr = nodeToAttr.lookup(node))
-    return attr;
-
-  // Register with the recursive translator. If it can be handled without
-  // recursing into it, return the result immediately.
-  if (DINodeAttr attr = recursionPruner.pruneOrPushTranslationStack(node))
-    return attr;
-
-  auto guard = llvm::make_scope_exit(
-      [&]() { recursionPruner.popTranslationStack(node); });
+  auto cacheEntry = cache.lookupOrInit(node);
+  if (std::optional<DINodeAttr> result = cacheEntry.get())
+    return *result;
 
   // Convert the debug metadata if possible.
   auto translateNode = [this](llvm::DINode *node) -> DINodeAttr {
@@ -335,20 +328,20 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) {
     return nullptr;
   };
   if (DINodeAttr attr = translateNode(node)) {
-    auto [result, isSelfContained] =
-        recursionPruner.finalizeTranslation(node, attr);
-    // Only cache fully self-contained nodes.
-    if (isSelfContained)
-      nodeToAttr.try_emplace(node, result);
-    return result;
+    // If this node was repeated, lookup its recursive ID and assign it to the
+    // base result.
+    if (cacheEntry.wasRepeated()) {
+      DistinctAttr recId = nodeToRecId.lookup(node);
+      auto recType = cast<DIRecursiveTypeAttrInterface>(attr);
+      attr = cast<DINodeAttr>(recType.withRecId(recId));
+    }
+    cacheEntry.resolve(attr);
+    return attr;
   }
+  cacheEntry.resolve(nullptr);
   return nullptr;
 }
 
-//===----------------------------------------------------------------------===//
-// RecursionPruner
-//===----------------------------------------------------------------------===//
-
 /// Get the `getRecSelf` constructor for the translated type of `node` if its
 /// translated DITypeAttr supports recursion. Otherwise, returns nullptr.
 static function_ref<DIRecursiveTypeAttrInterface(DistinctAttr)>
@@ -361,104 +354,20 @@ getRecSelfConstructor(llvm::DINode *node) {
       .Default(CtorType());
 }
 
-DINodeAttr DebugImporter::RecursionPruner::pruneOrPushTranslationStack(
-    llvm::DINode *node) {
-  // If the node type is capable of being recursive, check if it's seen
-  // before.
+std::optional<DINodeAttr> DebugImporter::createRecSelf(llvm::DINode *node) {
   auto recSelfCtor = getRecSelfConstructor(node);
-  if (recSelfCtor) {
-    // If a cyclic dependency is detected since the same node is being
-    // traversed twice, emit a recursive self type, and mark the duplicate
-    // node on the translationStack so it can emit a recursive decl type.
-    auto [iter, inserted] = translationStack.try_emplace(node);
-    if (!inserted) {
-      // The original node may have already been assigned a recursive ID from
-      // a different self-reference. Use that if possible.
-      DIRecursiveTypeAttrInterface recSelf = iter->second.recSelf;
-      if (!recSelf) {
-        DistinctAttr recId = nodeToRecId.lookup(node);
-        if (!recId) {
-          recId = DistinctAttr::create(UnitAttr::get(context));
-          nodeToRecId[node] = recId;
-        }
-        recSelf = recSelfCtor(recId);
-        iter->second.recSelf = recSelf;
-      }
-      // Inject the self-ref into the previous layer.
-      translationStack.back().second.unboundSelfRefs.insert(recSelf);
-      return cast<DINodeAttr>(recSelf);
-    }
+  if (!recSelfCtor)
+    return std::nullopt;
+
+  // The original node may have already been assigned a recursive ID from
+  // a different self-reference. Use that if possible.
+  DistinctAttr recId = nodeToRecId.lookup(node);
+  if (!recId) {
+    recId = DistinctAttr::create(UnitAttr::get(context));
+    nodeToRecId[node] = recId;
   }
-
-  return lookup(node);
-}
-
-std::pair<DINodeAttr, bool>
-DebugImporter::RecursionPruner::finalizeTranslation(llvm::DINode *node,
-                                                    DINodeAttr result) {
-  // If `node` is not a potentially recursive type, it will not be on the
-  // translation stack. Nothing to set in this case.
-  if (translationStack.empty())
-    return {result, true};
-  if (translationStack.back().first != node)
-    return {result, translationStack.back().second.unboundSelfRefs.empty()};
-
-  TranslationState &state = translationStack.back().second;
-
-  // If this node is actually recursive, set the recId onto `result`.
-  if (DIRecursiveTypeAttrInterface recSelf = state.recSelf) {
-    auto recType = cast<DIRecursiveTypeAttrInterface>(result);
-    result = cast<DINodeAttr>(recType.withRecId(recSelf.getRecId()));
-    // Remove this recSelf from the set of unbound selfRefs.
-    state.unboundSelfRefs.erase(recSelf);
-  }
-
-  // Insert the result into our internal cache if it's not self-contained.
-  if (!state.unboundSelfRefs.empty()) {
-    [[maybe_unused]] auto [_, inserted] = dependentCache.try_emplace(
-        node, DependentTranslation{result, state.unboundSelfRefs});
-    assert(inserted && "invalid state: caching the same DINode twice");
-    return {result, false};
-  }
-  return {result, true};
-}
-
-void DebugImporter::RecursionPruner::popTranslationStack(llvm::DINode *node) {
-  // If `node` is not a potentially recursive type, it will not be on the
-  // translation stack. Nothing to handle in this case.
-  if (translationStack.empty() || translationStack.back().first != node)
-    return;
-
-  // At the end of the stack, all unbound self-refs must be resolved already,
-  // and the entire cache should be accounted for.
-  TranslationState &currLayerState = translationStack.back().second;
-  if (translationStack.size() == 1) {
-    assert(currLayerState.unboundSelfRefs.empty() &&
-           "internal error: unbound recursive self reference at top level.");
-    translationStack.pop_back();
-    return;
-  }
-
-  // Copy unboundSelfRefs down to the previous level.
-  TranslationState &nextLayerState = (++translationStack.rbegin())->second;
-  nextLayerState.unboundSelfRefs.insert(currLayerState.unboundSelfRefs.begin(),
-                                        currLayerState.unboundSelfRefs.end());
-  translationStack.pop_back();
-}
-
-DINodeAttr DebugImporter::RecursionPruner::lookup(llvm::DINode *node) {
-  auto cacheIter = dependentCache.find(node);
-  if (cacheIter == dependentCache.end())
-    return {};
-
-  DependentTranslation &entry = cacheIter->second;
-  if (llvm::set_is_subset(entry.unboundSelfRefs,
-                          translationStack.back().second.unboundSelfRefs))
-    return entry.attr;
-
-  // Stale cache entry.
-  dependentCache.erase(cacheIter);
-  return {};
+  DIRecursiveTypeAttrInterface recSelf = recSelfCtor(recId);
+  return cast<DINodeAttr>(recSelf);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.h b/mlir/lib/Target/LLVMIR/DebugImporter.h
index fac86dbe2cdd2..4a2bf35c160e1 100644
--- a/mlir/lib/Target/LLVMIR/DebugImporter.h
+++ b/mlir/lib/Target/LLVMIR/DebugImporter.h
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"
+#include "mlir/Support/CyclicReplacerCache.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 
 namespace mlir {
@@ -87,102 +88,18 @@ class DebugImporter {
   /// for it, or create a new one if not.
   DistinctAttr getOrCreateDistinctID(llvm::DINode *node);
 
-  /// A mapping between LLVM debug metadata and the corresponding attribute.
-  DenseMap<llvm::DINode *, DINodeAttr> nodeToAttr;
+  std::optional<DINodeAttr> createRecSelf(llvm::DINode *node);
+
   /// A mapping between distinct LLVM debug metadata nodes and the corresponding
   /// distinct id attribute.
   DenseMap<llvm::DINode *, DistinctAttr> nodeToDistinctAttr;
 
-  /// Translation helper for recursive DINodes.
-  /// Works alongside a stack-based DINode translator (the "main translator")
-  /// for gracefully handling DINodes that are recursive.
-  ///
-  /// Usage:
-  /// - Before translating a node, call `pruneOrPushTranslationStack` to see if
-  ///   the pruner can preempt this translation. If this is a node that the
-  ///   pruner already knows how to handle, it will return the translated
-  ///   DINodeAttr.
-  /// - After a node is successfully translated by the main translator, call
-  ///   `finalizeTranslation` to save the translated result with the pruner, and
-  ///   give it a chance to further modify the result.
-  /// - Regardless of success or failure by the main translator, always call
-  ///   `popTranslationStack` at the end of translating a node. This is
-  ///   necessary to keep the internal book-keeping in sync.
-  ///
-  /// This helper maintains an internal cache so that no recursive type will
-  /// be translated more than once by the main translator.
-  /// This internal cache is different from the cache maintained by the main
-  /// translator because it may store nodes that are not self-contained (i.e.
-  /// contain unbounded recursive self-references).
-  class RecursionPruner {
-  public:
-    RecursionPruner(MLIRContext *context) : context(context) {}
-
-    /// If this node is a recursive instance that was previously seen, returns a
-    /// self-reference. If this node was previously cached, returns the cached
-    /// result. Otherwise, returns null attr, and a translation stack frame is
-    /// created for this node. Expects `finalizeTranslation` &
-    /// `popTranslationStack` to be called on this node later.
-    DINodeAttr pruneOrPushTranslationStack(llvm::DINode *node);
-
-    /// Register the translated result of `node`. Returns the finalized result
-    /// (with recId if recursive) and whether the result is self-contained
-    /// (i.e. contains no unbound self-refs).
-    std::pair<DINodeAttr, bool> finalizeTranslation(llvm::DINode *node,
-                                                    DINodeAttr result);
-
-    /// Pop off a frame from the translation stack after a node is done being
-    /// translated.
-    void popTranslationStack(llvm::DINode *node);
-
-  private:
-    /// Returns the cached result (if exists) or null.
-    /// The cache entry will be removed if not all of its dependent self-refs
-    /// exists.
-    DINodeAttr lookup(llvm::DINode *node);
-
-    MLIRContext *context;
-
-    /// A cached translation that contains the translated attribute as well
-    /// as any unbound self-references that it depends on.
-    struct DependentTranslation {
-      /// The translated attr. May contain unbound self-references for other
-      /// recursive attrs.
-      DINodeAttr attr;
-      /// The set of unbound self-refs that this cached entry refers to. All
-      /// these self-refs must exist for the cached entry to be valid.
-      DenseSet<DIRecursiveTypeAttrInterface> unboundSelfRefs;
-    };
-    /// A mapping between LLVM debug metadata and the corresponding attribute.
-    /// Only contains those with unboundSelfRefs. Fully self-contained attrs
-    /// will be cached by the outer main translator.
-    DenseMap<llvm::DINode *, DependentTranslation> dependentCache;
-
-    /// Each potentially recursive node will have a TranslationState pushed onto
-    /// the `translationStack` to keep track of whether this node is actually
-    /// recursive (i.e. has self-references inside), and other book-keeping.
-    struct TranslationState {
-      /// The rec-self if this node is indeed a recursive node (i.e. another
-      /// instance of itself is seen while translating it). Null if this node
-      /// has not been seen again deeper in the translation stack.
-      DIRecursiveTypeAttrInterface recSelf;
-      /// All the unbound recursive self references in this layer of the
-      /// translation stack.
-      DenseSet<DIRecursiveTypeAttrInterface> unboundSelfRefs;
-    };
-    /// A stack that stores the metadata nodes that are being traversed. The
-    /// stack is used to handle cyclic dependencies during metadata translation.
-    /// Each node is pushed with an empty TranslationState. If it is ever seen
-    /// later when the stack is deeper, the node is recursive, and its
-    /// TranslationState is assigned a recSelf.
-    llvm::MapVector<llvm::DINode *, TranslationState> translationStack;
-
-    /// A mapping between DINodes that are recursive, and their assigned recId.
-    /// This is kept so that repeated occurrences of the same node can reuse the
-    /// same ID and be deduplicated.
-    DenseMap<llvm::DINode *, DistinctAttr> nodeToRecId;
-  };
-  RecursionPruner recursionPruner;
+  /// A mapping between DINodes that are recursive, and their assigned recId.
+  /// This is kept so that repeated occurrences of the same node can reuse the
+  /// same ID and be deduplicated.
+  DenseMap<llvm::DINode *, DistinctAttr> nodeToRecId;
+
+  CyclicReplacerCache<llvm::DINode *, DINodeAttr> cache;
 
   MLIRContext *context;
   ModuleOp mlirModule;
diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll
index 2173cc33e68a4..a194ecbf2eb20 100644
--- a/mlir/test/Target/LLVMIR/Import/debug-info.ll
+++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll
@@ -308,16 +308,16 @@ define void @class_method() {
 }
 
 ; Verify the cyclic composite type is identified, even though conversion begins from the subprogram type.
-; CHECK: #[[COMP_SELF:.+]] = #llvm.di_composite_type<tag = DW_TAG_null, recId = [[REC_ID:.+]]>
-; CHECK: #[[COMP_PTR:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP_SELF]], sizeInBits = 64>
-; CHECK: #[[SP_TYPE:.+]] = #llvm.di_subroutine_type<types = #{{.*}}, #[[COMP_PTR]]>
-; CHECK: #[[SP_INNER:.+]] = #llvm.di_subprogram<id = [[SP_ID:.+]], compileUnit = #{{.*}}, scope = #[[COMP_SELF]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE]]>
-; CHECK: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, recId = [[REC_ID]], name = "class_name", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial", elements = #[[SP_INNER]]>
+; CHECK-DAG: #[[COMP_SELF:.+]] = #llvm.di_composite_type<tag = DW_TAG_null, recId = [[REC_ID:.+]]>
+; CHECK-DAG: #[[COMP_PTR:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP_SELF]], sizeInBits = 64>
+; CHECK-DAG: #[[SP_TYPE:.+]] = #llvm.di_subroutine_type<types = #{{.*}}, #[[COMP_PTR]]>
+; CHECK-DAG: #[[SP_INNER:.+]] = #llvm.di_subprogram<id = [[SP_ID:.+]], compileUnit = #{{.*}}, scope = #[[COMP_SELF]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE]]>
+; CHECK-DAG: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, recId = [[REC_ID]], name = "class_name", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial", elements = #[[SP_INNER]]>
 
-; CHECK: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]], sizeInBits = 64>
-; CHECK: #[[SP_TYPE_OUTER:.+]] = #llvm.di_subroutine_type<types = #{{.*}}, #[[COMP_PTR_OUTER]]>
-; CHECK: #[[SP_OUTER:.+]] = #llvm.di_subprogram<id = [[SP_ID]], compileUnit = #{{.*}}, scope = #[[COMP]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE_OUTER]]>
-; CHECK: #[[LOC]] = loc(fused<#[[SP_OUTER]]>
+; CHECK-DAG: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]], sizeInBits = 64>
+; CHECK-DAG: #[[SP_TYPE_OUTER:.+]] = #llvm.di_subroutine_type<types = #{{.*}}, #[[COMP_PTR_OUTER]]>
+; CHECK-DAG: #[[SP_OUTER:.+]] = #llvm.di_subprogram<id = [[SP_ID]], compileUnit = #{{.*}}, scope = #[[COMP]], name = "class_method", file = #{{.*}}, subprogramFlags = Definition, type = #[[SP_TYPE_OUTER]]>
+; CHECK-DAG: #[[LOC]] = loc(fused<#[[SP_OUTER]]>
 
 !llvm.dbg.cu = !{!1}
 !llvm.module.flags = !{!0}
@@ -335,12 +335,12 @@ define void @class_method() {
 ; // -----
 
 ; Verify the cyclic composite type is handled correctly.
-; CHECK: #[[COMP_SELF:.+]] = #llvm.di_composite_type<tag = DW_TAG_null, recId = [[REC_ID:.+]]>
-; CHECK: #[[COMP_PTR_INNER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP_SELF]]>
-; CHECK: #[[FIELD:.+]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "call_field", baseType = #[[COMP_PTR_INNER]]>
-; CHECK: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, recId = [[REC_ID]], name = "class_field", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial", elements = #[[FIELD]]>
-; CHECK: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]]>
-; CHECK: #[[VAR0:.+]] = #llvm.di_local_variable<scope = #{{.*}}, name = "class_field", file = #{{.*}}, type = #[[COMP_PTR_OUTER]]>
+; CHECK-DAG: #[[COMP_SELF:.+]] = #llvm.di_composite_type<tag = DW_TAG_null, recId = [[REC_ID:.+]]>
+; CHECK-DAG: #[[COMP_PTR_INNER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP_SELF]]>
+; CHECK-DAG: #[[FIELD:.+]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "call_field", baseType = #[[COMP_PTR_INNER]]>
+; CHECK-DAG: #[[COMP:.+]] = #llvm.di_composite_type<tag = DW_TAG_class_type, recId = [[REC_ID]], name = "class_field", file = #{{.*}}, line = 42, flags = "TypePassByReference|NonTrivial", elements = #[[FIELD]]>
+; CHECK-DAG: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, baseType = #[[COMP]]>
+; CHECK-DAG: #[[VAR0:.+]] = #llvm.di_local_variable<scope = #{{.*}}, name = "class_field", file = #{{.*}}, type = #[[COMP_PTR_OUTER]]>
 
 ; CHECK: @class_field
 ; CHECK-SAME:  %[[ARG0:[a-zA-Z0-9]+]]
@@ -727,9 +727,7 @@ define void @class_field(ptr %arg1) !dbg !18 {
 ; CHECK-DAG: #[[B_TO_C]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_FROM_B:.+]]>
 ; CHECK-DAG: #[[C_FROM_B]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID:.+]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF:.+]], #[[TO_B_SELF:.+]], #[[TO_C_SELF:.+]]>
 
-; CHECK-DAG: #[[C_FROM_A]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF]], #[[TO_B_INNER:.+]], #[[TO_C_SELF]]
-; CHECK-DAG: #[[TO_B_INNER]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_INNER:.+]]>
-; CHECK-DAG: #[[B_INNER]] = #llvm.di_composite_type<{{.*}}name = "B", {{.*}}elements = #[[TO_C_SELF]]>
+; CHECK-DAG: #[[C_FROM_A]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF]], #[[A_TO_B:.+]], #[[TO_C_SELF]]
 
 ; CHECK-DAG: #[[TO_A_SELF]] = #llvm.di_derived_type<{{.*}}name = "->A", {{.*}}baseType = #[[A_SELF:.+]]>
 ; CHECK-DAG: #[[TO_B_SELF]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_SELF:.+]]>



More information about the llvm-branch-commits mailing list