[llvm] 3526020 - Repply [memprof] Introduce FrameIdConverter and CallStackIdConverter (#90307)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 28 11:44:53 PDT 2024


Author: Kazu Hirata
Date: 2024-04-28T11:44:45-07:00
New Revision: 352602010fac5c7e07bc6b992848ab746d0c2857

URL: https://github.com/llvm/llvm-project/commit/352602010fac5c7e07bc6b992848ab746d0c2857
DIFF: https://github.com/llvm/llvm-project/commit/352602010fac5c7e07bc6b992848ab746d0c2857.diff

LOG: Repply [memprof] Introduce FrameIdConverter and CallStackIdConverter (#90307)

Currently, we convert FrameId to Frame and CallStackId to a call stack
at several places.  This patch unifies those into function objects --
FrameIdConverter and CallStackIdConverter.

The existing implementation of CallStackIdConverter, being removed in
this patch, handles both FrameId and CallStackId conversions.  This
patch splits it into two phases for flexibility (but make them
composable) because some places only require the FrameId conversion.

This iteration fixes a problem uncovered with ubsan, where we were
dereferencing an uninitialized std::unique_ptr.

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/include/llvm/ProfileData/MemProfReader.h
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/unittests/ProfileData/InstrProfTest.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index d378c3696f8d0bb..8b00faf2a219dfd 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -737,6 +737,64 @@ class CallStackLookupTrait {
 // Compute a CallStackId for a given call stack.
 CallStackId hashCallStack(ArrayRef<FrameId> CS);
 
+namespace detail {
+// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable.  We have
+// to do so in one of two 
diff erent ways depending on the type of the hash
+// table.
+template <typename value_type, typename IterTy>
+value_type DerefIterator(IterTy Iter) {
+  using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
+  if constexpr (std::is_same_v<deref_type, value_type>)
+    return *Iter;
+  else
+    return Iter->second;
+}
+} // namespace detail
+
+// A function object that returns a frame for a given FrameId.
+template <typename MapTy> struct FrameIdConverter {
+  std::optional<FrameId> LastUnmappedId;
+  MapTy ⤅
+
+  FrameIdConverter() = delete;
+  FrameIdConverter(MapTy &Map) : Map(Map) {}
+
+  Frame operator()(FrameId Id) {
+    auto Iter = Map.find(Id);
+    if (Iter == Map.end()) {
+      LastUnmappedId = Id;
+      return Frame(0, 0, 0, false);
+    }
+    return detail::DerefIterator<Frame>(Iter);
+  }
+};
+
+// A function object that returns a call stack for a given CallStackId.
+template <typename MapTy> struct CallStackIdConverter {
+  std::optional<CallStackId> LastUnmappedId;
+  MapTy ⤅
+  std::function<Frame(FrameId)> FrameIdToFrame;
+
+  CallStackIdConverter() = delete;
+  CallStackIdConverter(MapTy &Map, std::function<Frame(FrameId)> FrameIdToFrame)
+      : Map(Map), FrameIdToFrame(FrameIdToFrame) {}
+
+  llvm::SmallVector<Frame> operator()(CallStackId CSId) {
+    llvm::SmallVector<Frame> Frames;
+    auto CSIter = Map.find(CSId);
+    if (CSIter == Map.end()) {
+      LastUnmappedId = CSId;
+    } else {
+      llvm::SmallVector<FrameId> CS =
+          detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
+      Frames.reserve(CS.size());
+      for (FrameId Id : CS)
+        Frames.push_back(FrameIdToFrame(Id));
+    }
+    return Frames;
+  }
+};
+
 // Verify that each CallStackId is computed with hashCallStack.  This function
 // is intended to help transition from CallStack to CSId in
 // IndexedAllocationInfo.

diff  --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 444c58e8bdc8bc2..b42e4f597774099 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -76,20 +76,16 @@ class MemProfReader {
       Callback =
           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
 
-    auto CallStackCallback = [&](CallStackId CSId) {
-      llvm::SmallVector<Frame> CallStack;
-      auto Iter = CSIdToCallStack.find(CSId);
-      assert(Iter != CSIdToCallStack.end());
-      for (FrameId Id : Iter->second)
-        CallStack.push_back(Callback(Id));
-      return CallStack;
-    };
+    memprof::CallStackIdConverter<decltype(CSIdToCallStack)> CSIdConv(
+        CSIdToCallStack, Callback);
 
     const IndexedMemProfRecord &IndexedRecord = Iter->second;
     GuidRecord = {
         Iter->first,
-        IndexedRecord.toMemProfRecord(CallStackCallback),
+        IndexedRecord.toMemProfRecord(CSIdConv),
     };
+    if (CSIdConv.LastUnmappedId)
+      return make_error<InstrProfError>(instrprof_error::hash_mismatch);
     Iter++;
     return Error::success();
   }

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index cefb6af12d00218..ba21e01abfbaca5 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1520,54 +1520,38 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
 
   // Setup a callback to convert from frame ids to frame using the on-disk
   // FrameData hash table.
-  std::optional<memprof::FrameId> LastUnmappedFrameId;
-  auto IdToFrameCallback = [&](const memprof::FrameId Id) {
-    auto FrIter = MemProfFrameTable->find(Id);
-    if (FrIter == MemProfFrameTable->end()) {
-      LastUnmappedFrameId = Id;
-      return memprof::Frame(0, 0, 0, false);
-    }
-    return *FrIter;
-  };
-
-  // Setup a callback to convert call stack ids to call stacks using the on-disk
-  // hash table.
-  std::optional<memprof::CallStackId> LastUnmappedCSId;
-  auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) {
-    llvm::SmallVector<memprof::Frame> Frames;
-    auto CSIter = MemProfCallStackTable->find(CSId);
-    if (CSIter == MemProfCallStackTable->end()) {
-      LastUnmappedCSId = CSId;
-    } else {
-      const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
-      Frames.reserve(CS.size());
-      for (memprof::FrameId Id : CS)
-        Frames.push_back(IdToFrameCallback(Id));
-    }
-    return Frames;
-  };
+  memprof::FrameIdConverter<MemProfFrameHashTable> FrameIdConv(
+      *MemProfFrameTable.get());
 
   const memprof::IndexedMemProfRecord IndexedRecord = *Iter;
   memprof::MemProfRecord Record;
-  if (MemProfCallStackTable)
-    Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback);
-  else
-    Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback);
+  if (MemProfCallStackTable) {
+    // Setup a callback to convert call stack ids to call stacks using the
+    // on-disk hash table.
+    memprof::CallStackIdConverter<MemProfCallStackHashTable> CSIdConv(
+        *MemProfCallStackTable.get(), FrameIdConv);
 
-  // Check that all frame ids were successfully converted to frames.
-  if (LastUnmappedFrameId) {
-    return make_error<InstrProfError>(instrprof_error::hash_mismatch,
-                                      "memprof frame not found for frame id " +
-                                          Twine(*LastUnmappedFrameId));
+    Record = IndexedRecord.toMemProfRecord(CSIdConv);
+
+    // Check that all call stack ids were successfully converted to call stacks.
+    if (CSIdConv.LastUnmappedId) {
+      return make_error<InstrProfError>(
+          instrprof_error::hash_mismatch,
+          "memprof call stack not found for call stack id " +
+              Twine(*CSIdConv.LastUnmappedId));
+    }
+  } else {
+    Record = memprof::MemProfRecord(IndexedRecord, FrameIdConv);
   }
 
-  // Check that all call stack ids were successfully converted to call stacks.
-  if (LastUnmappedCSId) {
+  // Check that all frame ids were successfully converted to frames.
+  if (FrameIdConv.LastUnmappedId) {
     return make_error<InstrProfError>(
         instrprof_error::hash_mismatch,
-        "memprof call stack not found for call stack id " +
-            Twine(*LastUnmappedCSId));
+        "memprof frame not found for frame id " +
+            Twine(*FrameIdConv.LastUnmappedId));
   }
+
   return Record;
 }
 

diff  --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index edc427dcbc45400..acc633de11b6bd5 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -495,44 +495,6 @@ TEST_F(InstrProfTest, test_memprof_v0) {
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 
-struct CallStackIdConverter {
-  std::optional<memprof::FrameId> LastUnmappedFrameId;
-  std::optional<memprof::CallStackId> LastUnmappedCSId;
-
-  const FrameIdMapTy &IdToFrameMap;
-  const CallStackIdMapTy &CSIdToCallStackMap;
-
-  CallStackIdConverter() = delete;
-  CallStackIdConverter(const FrameIdMapTy &IdToFrameMap,
-                       const CallStackIdMapTy &CSIdToCallStackMap)
-      : IdToFrameMap(IdToFrameMap), CSIdToCallStackMap(CSIdToCallStackMap) {}
-
-  llvm::SmallVector<memprof::Frame>
-  operator()(::llvm::memprof::CallStackId CSId) {
-    auto IdToFrameCallback = [&](const memprof::FrameId Id) {
-      auto Iter = IdToFrameMap.find(Id);
-      if (Iter == IdToFrameMap.end()) {
-        LastUnmappedFrameId = Id;
-        return memprof::Frame(0, 0, 0, false);
-      }
-      return Iter->second;
-    };
-
-    llvm::SmallVector<memprof::Frame> Frames;
-    auto CSIter = CSIdToCallStackMap.find(CSId);
-    if (CSIter == CSIdToCallStackMap.end()) {
-      LastUnmappedCSId = CSId;
-    } else {
-      const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
-          CSIter->getSecond();
-      Frames.reserve(CS.size());
-      for (::llvm::memprof::FrameId Id : CS)
-        Frames.push_back(IdToFrameCallback(Id));
-    }
-    return Frames;
-  }
-};
-
 TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
   const MemInfoBlock MIB = makeFullMIB();
 
@@ -562,14 +524,16 @@ TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
   ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
   const memprof::MemProfRecord &Record = RecordOr.get();
 
-  CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);
+  memprof::FrameIdConverter<decltype(IdToFrameMap)> FrameIdConv(IdToFrameMap);
+  memprof::CallStackIdConverter<decltype(CSIdToCallStackMap)> CSIdConv(
+      CSIdToCallStackMap, FrameIdConv);
 
   const ::llvm::memprof::MemProfRecord WantRecord =
       IndexedMR.toMemProfRecord(CSIdConv);
-  ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
-      << "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
-  ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
-      << "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
+  ASSERT_EQ(FrameIdConv.LastUnmappedId, std::nullopt)
+      << "could not map frame id: " << *FrameIdConv.LastUnmappedId;
+  ASSERT_EQ(CSIdConv.LastUnmappedId, std::nullopt)
+      << "could not map call stack id: " << *CSIdConv.LastUnmappedId;
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 
@@ -602,14 +566,16 @@ TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
   ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
   const memprof::MemProfRecord &Record = RecordOr.get();
 
-  CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);
+  memprof::FrameIdConverter<decltype(IdToFrameMap)> FrameIdConv(IdToFrameMap);
+  memprof::CallStackIdConverter<decltype(CSIdToCallStackMap)> CSIdConv(
+      CSIdToCallStackMap, FrameIdConv);
 
   const ::llvm::memprof::MemProfRecord WantRecord =
       IndexedMR.toMemProfRecord(CSIdConv);
-  ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
-      << "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
-  ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
-      << "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
+  ASSERT_EQ(FrameIdConv.LastUnmappedId, std::nullopt)
+      << "could not map frame id: " << *FrameIdConv.LastUnmappedId;
+  ASSERT_EQ(CSIdConv.LastUnmappedId, std::nullopt)
+      << "could not map call stack id: " << *CSIdConv.LastUnmappedId;
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 98dacd3511e1d85..d031049cea14bf7 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -502,37 +502,15 @@ TEST(MemProf, IndexedMemProfRecordToMemProfRecord) {
   IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS3));
   IndexedRecord.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS4));
 
-  bool CSIdMissing = false;
-  bool FrameIdMissing = false;
-
-  auto Callback = [&](CallStackId CSId) -> llvm::SmallVector<Frame> {
-    llvm::SmallVector<Frame> CallStack;
-    llvm::SmallVector<FrameId> FrameIds;
-
-    auto Iter = CallStackIdMap.find(CSId);
-    if (Iter == CallStackIdMap.end())
-      CSIdMissing = true;
-    else
-      FrameIds = Iter->second;
-
-    for (FrameId Id : FrameIds) {
-      Frame F(0, 0, 0, false);
-      auto Iter = FrameIdMap.find(Id);
-      if (Iter == FrameIdMap.end())
-        FrameIdMissing = true;
-      else
-        F = Iter->second;
-      CallStack.push_back(F);
-    }
-
-    return CallStack;
-  };
-
-  MemProfRecord Record = IndexedRecord.toMemProfRecord(Callback);
+  llvm::memprof::FrameIdConverter<decltype(FrameIdMap)> FrameIdConv(FrameIdMap);
+  llvm::memprof::CallStackIdConverter<decltype(CallStackIdMap)> CSIdConv(
+      CallStackIdMap, FrameIdConv);
+
+  MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
 
   // Make sure that all lookups are successful.
-  ASSERT_FALSE(CSIdMissing);
-  ASSERT_FALSE(FrameIdMissing);
+  ASSERT_EQ(FrameIdConv.LastUnmappedId, std::nullopt);
+  ASSERT_EQ(CSIdConv.LastUnmappedId, std::nullopt);
 
   // Verify the contents of Record.
   ASSERT_THAT(Record.AllocSites, SizeIs(2));


        


More information about the llvm-commits mailing list