Cache uwnind frame headers as they are found.
Summary:
This improves unwind performance quite substantially, and follows
a somewhat similar approach used in libgcc_s as described in the
thread here:
https://gcc.gnu.org/ml/gcc/2005-02/msg00625.html
On certain extremely exception heavy internal tests, the time
drops from about 80 minutes to about five minutes.
Subscribers: libcxx-commits
Tags: #libc
Differential Revision: https://reviews.llvm.org/D75954
Cr-Mirrored-From: https://chromium.googlesource.com/external/github.com/llvm/llvm-project
Cr-Mirrored-Commit: c53c2058ffb8ff877702bb2dded31c85c1dfe66d
diff --git a/src/FrameHeaderCache.hpp b/src/FrameHeaderCache.hpp
new file mode 100644
index 0000000..813fcd4
--- /dev/null
+++ b/src/FrameHeaderCache.hpp
@@ -0,0 +1,149 @@
+//===-FrameHeaderCache.hpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Cache the elf program headers necessary to unwind the stack more efficiently
+// in the presence of many dsos.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __FRAMEHEADER_CACHE_HPP__
+#define __FRAMEHEADER_CACHE_HPP__
+
+#include "config.h"
+#include <limits.h>
+
+#ifdef _LIBUNWIND_DEBUG_FRAMEHEADER_CACHE
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x) _LIBUNWIND_LOG0(x)
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...) \
+ _LIBUNWIND_LOG(msg, __VA_ARGS__)
+#else
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE0(x)
+#define _LIBUNWIND_FRAMEHEADERCACHE_TRACE(msg, ...)
+#endif
+
+// This cache should only be be used from within a dl_iterate_phdr callback.
+// dl_iterate_phdr does the necessary synchronization to prevent problems
+// with concurrent access via the libc load lock. Adding synchronization
+// for other uses is possible, but not currently done.
+
+class _LIBUNWIND_HIDDEN FrameHeaderCache {
+ struct CacheEntry {
+ uintptr_t LowPC() { return Info.dso_base; };
+ uintptr_t HighPC() { return Info.dso_base + Info.dwarf_section_length; };
+ UnwindInfoSections Info;
+ CacheEntry *Next;
+ };
+
+ static const size_t kCacheEntryCount = 8;
+
+ // Can't depend on the C++ standard library in libunwind, so use an array to
+ // allocate the entries, and two linked lists for ordering unused and recently
+ // used entries. FIXME: Would the the extra memory for a doubly-linked list
+ // be better than the runtime cost of traversing a very short singly-linked
+ // list on a cache miss? The entries themselves are all small and consecutive,
+ // so unlikely to cause page faults when following the pointers. The memory
+ // spent on additional pointers could also be spent on more entries.
+
+ CacheEntry Entries[kCacheEntryCount];
+ CacheEntry *MostRecentlyUsed;
+ CacheEntry *Unused;
+
+ void resetCache() {
+ _LIBUNWIND_FRAMEHEADERCACHE_TRACE0("FrameHeaderCache reset");
+ MostRecentlyUsed = nullptr;
+ Unused = &Entries[0];
+ for (size_t i = 0; i < kCacheEntryCount - 1; i++) {
+ Entries[i].Next = &Entries[i + 1];
+ }
+ Entries[kCacheEntryCount - 1].Next = nullptr;
+ }
+
+ bool cacheNeedsReset(dl_phdr_info *PInfo) {
+ // C libraries increment dl_phdr_info.adds and dl_phdr_info.subs when
+ // loading and unloading shared libraries. If these values change between
+ // iterations of dl_iterate_phdr, then invalidate the cache.
+
+ // These are static to avoid needing an initializer, and unsigned long long
+ // because that is their type within the extended dl_phdr_info. Initialize
+ // these to something extremely unlikely to be found upon the first call to
+ // dl_iterate_phdr.
+ static unsigned long long LastAdds = ULLONG_MAX;
+ static unsigned long long LastSubs = ULLONG_MAX;
+ if (PInfo->dlpi_adds != LastAdds || PInfo->dlpi_subs != LastSubs) {
+ // Resetting the entire cache is a big hammer, but this path is rare--
+ // usually just on the very first call, when the cache is empty anyway--so
+ // added complexity doesn't buy much.
+ LastAdds = PInfo->dlpi_adds;
+ LastSubs = PInfo->dlpi_subs;
+ resetCache();
+ return true;
+ }
+ return false;
+ }
+
+public:
+ bool find(dl_phdr_info *PInfo, size_t, void *data) {
+ if (cacheNeedsReset(PInfo) || MostRecentlyUsed == nullptr)
+ return false;
+
+ auto *CBData = static_cast<dl_iterate_cb_data *>(data);
+ CacheEntry *Current = MostRecentlyUsed;
+ CacheEntry *Previous = nullptr;
+ while (Current != nullptr) {
+ _LIBUNWIND_FRAMEHEADERCACHE_TRACE(
+ "FrameHeaderCache check %lx in [%lx - %lx)", CBData->targetAddr,
+ Current->LowPC(), Current->HighPC());
+ if (Current->LowPC() <= CBData->targetAddr &&
+ CBData->targetAddr < Current->HighPC()) {
+ _LIBUNWIND_FRAMEHEADERCACHE_TRACE(
+ "FrameHeaderCache hit %lx in [%lx - %lx)", CBData->targetAddr,
+ Current->LowPC(), Current->HighPC());
+ if (Previous) {
+ // If there is no Previous, then Current is already the
+ // MostRecentlyUsed, and no need to move it up.
+ Previous->Next = Current->Next;
+ Current->Next = MostRecentlyUsed;
+ MostRecentlyUsed = Current;
+ }
+ *CBData->sects = Current->Info;
+ return true;
+ }
+ Previous = Current;
+ Current = Current->Next;
+ }
+ _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache miss for address %lx",
+ CBData->targetAddr);
+ return false;
+ }
+
+ void add(const UnwindInfoSections *UIS) {
+ CacheEntry *Current = nullptr;
+
+ if (Unused != nullptr) {
+ Current = Unused;
+ Unused = Unused->Next;
+ } else {
+ Current = MostRecentlyUsed;
+ CacheEntry *Previous = nullptr;
+ while (Current->Next != nullptr) {
+ Previous = Current;
+ Current = Current->Next;
+ }
+ Previous->Next = nullptr;
+ _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache evict [%lx - %lx)",
+ Current->LowPC(), Current->HighPC());
+ }
+
+ Current->Info = *UIS;
+ Current->Next = MostRecentlyUsed;
+ MostRecentlyUsed = Current;
+ _LIBUNWIND_FRAMEHEADERCACHE_TRACE("FrameHeaderCache add [%lx - %lx)",
+ MostRecentlyUsed->LowPC(),
+ MostRecentlyUsed->HighPC());
+ }
+};
+
+#endif // __FRAMEHEADER_CACHE_HPP__