[libunwind] c53c205 - Cache uwnind frame headers as they are found.

Sterling Augustine via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 12 10:54:32 PDT 2020


Author: Sterling Augustine
Date: 2020-03-12T10:53:33-07:00
New Revision: c53c2058ffb8ff877702bb2dded31c85c1dfe66d

URL: https://github.com/llvm/llvm-project/commit/c53c2058ffb8ff877702bb2dded31c85c1dfe66d
DIFF: https://github.com/llvm/llvm-project/commit/c53c2058ffb8ff877702bb2dded31c85c1dfe66d.diff

LOG: Cache uwnind frame headers as they are found.

Summary:
This improves unwind performance quite substantially, and follows
a somewhat similar approach used in libgcc_s as described in the
thread here:

https://gcc.gnu.org/ml/gcc/2005-02/msg00625.html

On certain extremely exception heavy internal tests, the time
drops from about 80 minutes to about five minutes.

Subscribers: libcxx-commits

Tags: #libc

Differential Revision: https://reviews.llvm.org/D75954

Added: 
    libunwind/src/FrameHeaderCache.hpp
    libunwind/test/frameheadercache_test.pass.cpp

Modified: 
    libunwind/src/AddressSpace.hpp

Removed: 
    


################################################################################
diff  --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index 83af9aeaef77..a4564cb67328 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -452,6 +452,11 @@ struct _LIBUNWIND_HIDDEN dl_iterate_cb_data {
     #error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
   #endif
 
+#include "FrameHeaderCache.hpp"
+
+// There should be just one of these per process.
+static FrameHeaderCache ProcessFrameHeaderCache;
+
 static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
                                dl_iterate_cb_data *cbdata) {
   if (phdr->p_type == PT_LOAD) {
@@ -466,10 +471,13 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
   return false;
 }
 
-int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
+int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
+                             void *data) {
   auto cbdata = static_cast<dl_iterate_cb_data *>(data);
   if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
     return 0;
+  if (ProcessFrameHeaderCache.find(pinfo, pinfo_size, data))
+    return 1;
 
   Elf_Addr image_base = calculateImageBase(pinfo);
   bool found_obj = false;
@@ -496,8 +504,10 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
     } else if (!found_obj) {
       found_obj = checkAddrInSegment(phdr, image_base, cbdata);
     }
-    if (found_obj && found_hdr)
+    if (found_obj && found_hdr) {
+      ProcessFrameHeaderCache.add(cbdata->sects);
       return 1;
+    }
   }
   cbdata->sects->dwarf_section_length = 0;
   return 0;

diff  --git a/libunwind/src/FrameHeaderCache.hpp b/libunwind/src/FrameHeaderCache.hpp
new file mode 100644
index 000000000000..813fcd408b26
--- /dev/null
+++ b/libunwind/src/FrameHeaderCache.hpp
@@ -0,0 +1,149 @@
+//===-FrameHeaderCache.hpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Cache the elf program headers necessary to unwind the stack more efficiently
+// in the presence of many dsos.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __FRAMEHEADER_CACHE_HPP__
+#define __FRAMEHEADER_CACHE_HPP__
+
+#include "config.h"
+#include <limits.h>
+
+#ifdef _LIBUNWIND_DEBUG_FRAMEHEADER_CACHE
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x) _LIBUNWIND_LOG0(x)
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)                            \
+  _LIBUNWIND_LOG(msg, __VA_ARGS__)
+#else
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x)
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)
+#endif
+
+// This cache should only be be used from within a dl_iterate_phdr callback.
+// dl_iterate_phdr does the necessary synchronization to prevent problems
+// with concurrent access via the libc load lock. Adding synchronization
+// for other uses is possible, but not currently done.
+
+class _LIBUNWIND_HIDDEN FrameHeaderCache {
+  struct CacheEntry {
+    uintptr_t LowPC() { return Info.dso_base; };
+    uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; };
+    UnwindInfoSections Info;
+    CacheEntry *Next;
+  };
+
+  static const size_t kCacheEntryCount = 8;
+
+  // Can't depend on the C++ standard library in libunwind, so use an array to
+  // allocate the entries, and two linked lists for ordering unused and recently
+  // used entries.  FIXME: Would the the extra memory for a doubly-linked list
+  // be better than the runtime cost of traversing a very short singly-linked
+  // list on a cache miss? The entries themselves are all small and consecutive,
+  // so unlikely to cause page faults when following the pointers. The memory
+  // spent on additional pointers could also be spent on more entries.
+
+  CacheEntry Entries[kCacheEntryCount];
+  CacheEntry *MostRecentlyUsed;
+  CacheEntry *Unused;
+
+  void resetCache() {
+    _LIBUNWIND_FRAMEHEADERCACHE_TRACE0("FrameHeaderCache reset");
+    MostRecentlyUsed = nullptr;
+    Unused = &Entries[0];
+    for (size_t i = 0; i < kCacheEntryCount - 1; i++) {
+      Entries[i].Next = &Entries[i + 1];
+    }
+    Entries[kCacheEntryCount - 1].Next = nullptr;
+  }
+
+  bool cacheNeedsReset(dl_phdr_info *PInfo) {
+    // C libraries increment dl_phdr_info.adds and dl_phdr_info.subs when
+    // loading and unloading shared libraries. If these values change between
+    // iterations of dl_iterate_phdr, then invalidate the cache.
+
+    // These are static to avoid needing an initializer, and unsigned long long
+    // because that is their type within the extended dl_phdr_info.  Initialize
+    // these to something extremely unlikely to be found upon the first call to
+    // dl_iterate_phdr.
+    static unsigned long long LastAdds = ULLONG_MAX;
+    static unsigned long long LastSubs = ULLONG_MAX;
+    if (PInfo->dlpi_adds != LastAdds || PInfo->dlpi_subs != LastSubs) {
+      // Resetting the entire cache is a big hammer, but this path is rare--
+      // usually just on the very first call, when the cache is empty anyway--so
+      // added complexity doesn't buy much.
+      LastAdds = PInfo->dlpi_adds;
+      LastSubs = PInfo->dlpi_subs;
+      resetCache();
+      return true;
+    }
+    return false;
+  }
+
+public:
+  bool find(dl_phdr_info *PInfo, size_t, void *data) {
+    if (cacheNeedsReset(PInfo) || MostRecentlyUsed == nullptr)
+      return false;
+
+    auto *CBData = static_cast<dl_iterate_cb_data *>(data);
+    CacheEntry *Current = MostRecentlyUsed;
+    CacheEntry *Previous = nullptr;
+    while (Current != nullptr) {
+      _LIBUNWIND_FRAMEHEADERCACHE_TRACE(
+          "FrameHeaderCache check %lx in [%lx - %lx)", CBData->targetAddr,
+          Current->LowPC(), Current->HighPC());
+      if (Current->LowPC() <= CBData->targetAddr &&
+          CBData->targetAddr < Current->HighPC()) {
+        _LIBUNWIND_FRAMEHEADERCACHE_TRACE(
+            "FrameHeaderCache hit %lx in [%lx - %lx)", CBData->targetAddr,
+            Current->LowPC(), Current->HighPC());
+        if (Previous) {
+          // If there is no Previous, then Current is already the
+          // MostRecentlyUsed, and no need to move it up.
+          Previous->Next = Current->Next;
+          Current->Next = MostRecentlyUsed;
+          MostRecentlyUsed = Current;
+        }
+        *CBData->sects = Current->Info;
+        return true;
+      }
+      Previous = Current;
+      Current = Current->Next;
+    }
+    _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache miss for address %lx",
+                                      CBData->targetAddr);
+    return false;
+  }
+
+  void add(const UnwindInfoSections *UIS) {
+    CacheEntry *Current = nullptr;
+
+    if (Unused != nullptr) {
+      Current = Unused;
+      Unused = Unused->Next;
+    } else {
+      Current = MostRecentlyUsed;
+      CacheEntry *Previous = nullptr;
+      while (Current->Next != nullptr) {
+        Previous = Current;
+        Current = Current->Next;
+      }
+      Previous->Next = nullptr;
+      _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache evict [%lx - %lx)",
+                                        Current->LowPC(), Current->HighPC());
+    }
+
+    Current->Info = *UIS;
+    Current->Next = MostRecentlyUsed;
+    MostRecentlyUsed = Current;
+    _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache add [%lx - %lx)",
+                                      MostRecentlyUsed->LowPC(),
+                                      MostRecentlyUsed->HighPC());
+  }
+};
+
+#endif // __FRAMEHEADER_CACHE_HPP__

diff  --git a/libunwind/test/frameheadercache_test.pass.cpp b/libunwind/test/frameheadercache_test.pass.cpp
new file mode 100644
index 000000000000..ac75f7d0cb29
--- /dev/null
+++ b/libunwind/test/frameheadercache_test.pass.cpp
@@ -0,0 +1,82 @@
+// The other libunwind tests don't test internal interfaces, so the include path
+// is a little wonky.
+#include "../src/config.h"
+
+// Only run this test under supported configurations.
+// This #if chain is ugly, but see the comments in AddressSpace.hpp for
+// the reasoning.
+
+#ifdef __APPLE__
+int main() { return 0; }
+#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL)
+int main() { return 0; }
+#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL)
+int main() { return 0; }
+#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32)
+int main() { return 0; }
+#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
+int main() { return 0; }
+#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__)
+int main() { return 0; }
+#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+#include <link.h>
+#include <stdio.h>
+
+// This file defines several of the data structures needed here,
+// and includes FrameHeaderCache.hpp as well.
+#include "../src/AddressSpace.hpp"
+
+#define kBaseAddr 0xFFF000
+#define kDwarfSectionLength 0xFF
+
+using namespace libunwind;
+
+int main() {
+  FrameHeaderCache FHC;
+  struct dl_phdr_info PInfo;
+  memset(&PInfo, 0, sizeof(PInfo));
+  // The cache itself should only care about these two fields--they
+  // tell the cache to invalidate or not; everything else is handled
+  // by AddressSpace.hpp.
+  PInfo.dlpi_adds = 6;
+  PInfo.dlpi_subs = 7;
+
+  UnwindInfoSections UIS;
+  UIS.dso_base = kBaseAddr;
+  UIS.dwarf_section_length = kDwarfSectionLength;
+  dl_iterate_cb_data CBData;
+  // Unused by the cache.
+  CBData.addressSpace = nullptr;
+  CBData.sects = &UIS;
+  CBData.targetAddr = kBaseAddr + 1;
+
+  // Nothing present, shouldn't find.
+  if (FHC.find(&PInfo, 0, &CBData))
+    abort();
+  FHC.add(&UIS);
+  // Just added. Should find.
+  if (!FHC.find(&PInfo, 0, &CBData))
+    abort();
+  // Cache is invalid. Shouldn't find.
+  PInfo.dlpi_adds++;
+  if (FHC.find(&PInfo, 0, &CBData))
+    abort();
+
+  FHC.add(&UIS);
+  CBData.targetAddr = kBaseAddr - 1;
+  // Shouldn't find something outside of the addresses.
+  if (FHC.find(&PInfo, 0, &CBData))
+    abort();
+  // Add enough things to the cache that the entry is evicted.
+  for (int i = 0; i < 9; i++) {
+    UIS.dso_base = kBaseAddr + (kDwarfSectionLength * i);
+    FHC.add(&UIS);
+  }
+  CBData.targetAddr = kBaseAddr;
+  // Should have been evicted.
+  if (FHC.find(&PInfo, 0, &CBData))
+    abort();
+  return 0;
+}
+#endif


        


More information about the cfe-commits mailing list