[libc-commits] [compiler-rt] [libc] [scudo] Added LRU eviction policy to secondary cache. (PR #99409)

Mon Jul 22 22:54:35 PDT 2024

https://github.com/JoshuaMBa updated https://github.com/llvm/llvm-project/pull/99409

>From 1412d86e5f72c2bc9dda9d4212d371951cfdf0e0 Mon Sep 17 00:00:00 2001
From: RoseZhang03 <rosezhang at google.com>
Date: Wed, 17 Jul 2024 23:34:53 +0000
Subject: [PATCH] [libc] final edits to newheadergen yaml files (#98983)

- final run of integration tests to deal with incorrect YAML input
(finished sys headers, will finish the rest today)
- add any new functions made in recent PRs
---
 compiler-rt/lib/scudo/standalone/secondary.h  | 187 +++++++++++++-----
 libc/config/linux/x86_64/headers.txt          |   1 +
 libc/newhdrgen/yaml/{ => arpa}/arpa_inet.yaml |   5 +-
 libc/newhdrgen/yaml/assert.yaml               |   1 -
 .../yaml/{rpc.yaml => gpu/gpu_rpc.yaml}       |   0
 libc/newhdrgen/yaml/math.yaml                 |   6 +
 libc/newhdrgen/yaml/pthread.yaml              |   7 +-
 libc/newhdrgen/yaml/search.yaml               |   1 -
 libc/newhdrgen/yaml/sys/sys_time.yaml         |   3 +-
 libc/newhdrgen/yaml/wchar.yaml                |   1 +
 10 files changed, 155 insertions(+), 57 deletions(-)
 rename libc/newhdrgen/yaml/{ => arpa}/arpa_inet.yaml (86%)
 rename libc/newhdrgen/yaml/{rpc.yaml => gpu/gpu_rpc.yaml} (100%)

diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 9a8e53be388b7..b8e12db934963 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -19,6 +19,7 @@
 #include "stats.h"
 #include "string_utils.h"
 #include "thread_annotations.h"
+#include "vector.h"
 
 namespace scudo {
 
@@ -73,12 +74,18 @@ static inline void unmap(LargeBlock::Header *H) {
 }
 
 namespace {
+
 struct CachedBlock {
+  static constexpr u16 CacheIndexMax = UINT16_MAX;
+  static constexpr u16 InvalidEntry = CacheIndexMax;
+
   uptr CommitBase = 0;
   uptr CommitSize = 0;
   uptr BlockBegin = 0;
   MemMapT MemMap = {};
   u64 Time = 0;
+  u16 Next = 0;
+  u16 Prev = 0;
 
   bool isValid() { return CommitBase != 0; }
 
@@ -188,10 +195,11 @@ template <typename Config> class MapAllocatorCache {
     Str->append("Stats: CacheRetrievalStats: SuccessRate: %u/%u "
                 "(%zu.%02zu%%)\n",
                 SuccessfulRetrieves, CallsToRetrieve, Integral, Fractional);
-    for (CachedBlock Entry : Entries) {
-      if (!Entry.isValid())
-        continue;
-      Str->append("StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, "
+    Str->append("Cache Entry Info (Most Recent -> Least Recent):\n");
+
+    for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) {
+      CachedBlock &Entry = Entries[I];
+      Str->append("  StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, "
                   "BlockSize: %zu %s\n",
                   Entry.CommitBase, Entry.CommitBase + Entry.CommitSize,
                   Entry.CommitSize, Entry.Time == 0 ? "[R]" : "");
@@ -202,6 +210,10 @@ template <typename Config> class MapAllocatorCache {
   static_assert(Config::getDefaultMaxEntriesCount() <=
                     Config::getEntriesArraySize(),
                 "");
+  // Ensure the cache entry array size fits in the LRU list Next and Prev
+  // index fields
+  static_assert(Config::getEntriesArraySize() <= CachedBlock::CacheIndexMax,
+                "Cache entry array is too large to be indexed.");
 
   void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS {
     DCHECK_EQ(EntriesCount, 0U);
@@ -213,23 +225,33 @@ template <typename Config> class MapAllocatorCache {
     if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN)
       ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs();
     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
+
+    // The cache is initially empty
+    LRUHead = CachedBlock::InvalidEntry;
+    LRUTail = CachedBlock::InvalidEntry;
+
+    // Available entries will be retrieved starting from the beginning of the
+    // Entries array
+    AvailableHead = 0;
+    for (u32 I = 0; I < Config::getEntriesArraySize() - 1; I++)
+      Entries[I].Next = static_cast<u16>(I + 1);
+
+    Entries[Config::getEntriesArraySize() - 1].Next = CachedBlock::InvalidEntry;
   }
 
   void store(const Options &Options, LargeBlock::Header *H) EXCLUDES(Mutex) {
     if (!canCache(H->CommitSize))
       return unmap(H);
 
-    bool EntryCached = false;
-    bool EmptyCache = false;
     const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs);
-    const u64 Time = getMonotonicTimeFast();
-    const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount);
+    u64 Time;
     CachedBlock Entry;
+
     Entry.CommitBase = H->CommitBase;
     Entry.CommitSize = H->CommitSize;
     Entry.BlockBegin = reinterpret_cast<uptr>(H + 1);
     Entry.MemMap = H->MemMap;
-    Entry.Time = Time;
+    Entry.Time = UINT64_MAX;
     if (useMemoryTagging<Config>(Options)) {
       if (Interval == 0 && !SCUDO_FUCHSIA) {
         // Release the memory and make it inaccessible at the same time by
@@ -243,17 +265,32 @@ template <typename Config> class MapAllocatorCache {
         Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize,
                                          MAP_NOACCESS);
       }
-    } else if (Interval == 0) {
-      Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, Entry.CommitSize);
-      Entry.Time = 0;
     }
+
+    // Usually only one entry will be evicted from the cache.
+    // Only in the rare event that the cache shrinks in real-time
+    // due to a decrease in the configurable value MaxEntriesCount
+    // will more than one cache entry be evicted.
+    // The vector is used to save the MemMaps of evicted entries so
+    // that the unmap call can be performed outside the lock
+    Vector<MemMapT, 1U> EvictionMemMaps;
+
     do {
       ScopedLock L(Mutex);
+
+      // Time must be computed under the lock to ensure
+      // that the LRU cache remains sorted with respect to
+      // time in a multithreaded environment
+      Time = getMonotonicTimeFast();
+      if (Entry.Time != 0)
+        Entry.Time = Time;
+
       if (useMemoryTagging<Config>(Options) && QuarantinePos == -1U) {
         // If we get here then memory tagging was disabled in between when we
         // read Options and when we locked Mutex. We can't insert our entry into
         // the quarantine or the cache because the permissions would be wrong so
         // just unmap it.
+        Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity());
         break;
       }
       if (Config::getQuarantineSize() && useMemoryTagging<Config>(Options)) {
@@ -269,30 +306,27 @@ template <typename Config> class MapAllocatorCache {
           OldestTime = Entry.Time;
         Entry = PrevEntry;
       }
-      if (EntriesCount >= MaxCount) {
-        if (IsFullEvents++ == 4U)
-          EmptyCache = true;
-      } else {
-        for (u32 I = 0; I < MaxCount; I++) {
-          if (Entries[I].isValid())
-            continue;
-          if (I != 0)
-            Entries[I] = Entries[0];
-          Entries[0] = Entry;
-          EntriesCount++;
-          if (OldestTime == 0)
-            OldestTime = Entry.Time;
-          EntryCached = true;
-          break;
-        }
+
+      // All excess entries are evicted from the cache
+      while (needToEvict()) {
+        // Save MemMaps of evicted entries to perform unmap outside of lock
+        EvictionMemMaps.push_back(Entries[LRUTail].MemMap);
+        remove(LRUTail);
       }
+
+      insert(Entry);
+
+      if (OldestTime == 0)
+        OldestTime = Entry.Time;
     } while (0);
-    if (EmptyCache)
-      empty();
-    else if (Interval >= 0)
+
+    for (MemMapT &EvictMemMap : EvictionMemMaps)
+      EvictMemMap.unmap(EvictMemMap.getBase(), EvictMemMap.getCapacity());
+
+    if (Interval >= 0) {
+      // TODO: Add ReleaseToOS logic to LRU algorithm
       releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000);
-    if (!EntryCached)
-      Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity());
+    }
   }
 
   bool retrieve(Options Options, uptr Size, uptr Alignment, uptr HeadersSize,
@@ -312,9 +346,8 @@ template <typename Config> class MapAllocatorCache {
         return false;
       u32 OptimalFitIndex = 0;
       uptr MinDiff = UINTPTR_MAX;
-      for (u32 I = 0; I < MaxCount; I++) {
-        if (!Entries[I].isValid())
-          continue;
+      for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+           I = Entries[I].Next) {
         const uptr CommitBase = Entries[I].CommitBase;
         const uptr CommitSize = Entries[I].CommitSize;
         const uptr AllocPos =
@@ -347,8 +380,7 @@ template <typename Config> class MapAllocatorCache {
       }
       if (Found) {
         Entry = Entries[OptimalFitIndex];
-        Entries[OptimalFitIndex].invalidate();
-        EntriesCount--;
+        remove(OptimalFitIndex);
         SuccessfulRetrieves++;
       }
     }
@@ -418,11 +450,9 @@ template <typename Config> class MapAllocatorCache {
       }
     }
     const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount);
-    for (u32 I = 0; I < MaxCount; I++) {
-      if (Entries[I].isValid()) {
-        Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase,
-                                              Entries[I].CommitSize, 0);
-      }
+    for (u32 I = LRUHead; I != CachedBlock::InvalidEntry; I = Entries[I].Next) {
+      Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase,
+                                            Entries[I].CommitSize, 0);
     }
     QuarantinePos = -1U;
   }
@@ -434,6 +464,66 @@ template <typename Config> class MapAllocatorCache {
   void unmapTestOnly() { empty(); }
 
 private:
+  bool needToEvict() REQUIRES(Mutex) {
+    return (EntriesCount >= atomic_load_relaxed(&MaxEntriesCount));
+  }
+
+  void insert(const CachedBlock &Entry) REQUIRES(Mutex) {
+    DCHECK_LT(EntriesCount, atomic_load_relaxed(&MaxEntriesCount));
+
+    // Cache should be populated with valid entries when not empty
+    DCHECK_NE(AvailableHead, CachedBlock::InvalidEntry);
+
+    u32 FreeIndex = AvailableHead;
+    AvailableHead = Entries[AvailableHead].Next;
+
+    if (EntriesCount == 0) {
+      LRUTail = static_cast<u16>(FreeIndex);
+    } else {
+      // Check list order
+      if (EntriesCount > 1)
+        DCHECK_GE(Entries[LRUHead].Time, Entries[Entries[LRUHead].Next].Time);
+      Entries[LRUHead].Prev = static_cast<u16>(FreeIndex);
+    }
+
+    Entries[FreeIndex] = Entry;
+    Entries[FreeIndex].Next = LRUHead;
+    Entries[FreeIndex].Prev = CachedBlock::InvalidEntry;
+    LRUHead = static_cast<u16>(FreeIndex);
+    EntriesCount++;
+
+    // Availability stack should not have available entries when all entries
+    // are in use
+    if (EntriesCount == Config::getEntriesArraySize())
+      DCHECK_EQ(AvailableHead, CachedBlock::InvalidEntry);
+  }
+
+  void remove(uptr I) REQUIRES(Mutex) {
+    DCHECK(Entries[I].isValid());
+
+    Entries[I].invalidate();
+
+    if (I == LRUHead)
+      LRUHead = Entries[I].Next;
+    else
+      Entries[Entries[I].Prev].Next = Entries[I].Next;
+
+    if (I == LRUTail)
+      LRUTail = Entries[I].Prev;
+    else
+      Entries[Entries[I].Next].Prev = Entries[I].Prev;
+
+    Entries[I].Next = AvailableHead;
+    AvailableHead = static_cast<u16>(I);
+    EntriesCount--;
+
+    // Cache should not have valid entries when not empty
+    if (EntriesCount == 0) {
+      DCHECK_EQ(LRUHead, CachedBlock::InvalidEntry);
+      DCHECK_EQ(LRUTail, CachedBlock::InvalidEntry);
+    }
+  }
+
   void empty() {
     MemMapT MapInfo[Config::getEntriesArraySize()];
     uptr N = 0;
@@ -443,11 +533,10 @@ template <typename Config> class MapAllocatorCache {
         if (!Entries[I].isValid())
           continue;
         MapInfo[N] = Entries[I].MemMap;
-        Entries[I].invalidate();
+        remove(I);
         N++;
       }
       EntriesCount = 0;
-      IsFullEvents = 0;
     }
     for (uptr I = 0; I < N; I++) {
       MemMapT &MemMap = MapInfo[I];
@@ -484,7 +573,6 @@ template <typename Config> class MapAllocatorCache {
   atomic_u32 MaxEntriesCount = {};
   atomic_uptr MaxEntrySize = {};
   u64 OldestTime GUARDED_BY(Mutex) = 0;
-  u32 IsFullEvents GUARDED_BY(Mutex) = 0;
   atomic_s32 ReleaseToOsIntervalMs = {};
   u32 CallsToRetrieve GUARDED_BY(Mutex) = 0;
   u32 SuccessfulRetrieves GUARDED_BY(Mutex) = 0;
@@ -492,6 +580,13 @@ template <typename Config> class MapAllocatorCache {
   CachedBlock Entries[Config::getEntriesArraySize()] GUARDED_BY(Mutex) = {};
   NonZeroLengthArray<CachedBlock, Config::getQuarantineSize()>
       Quarantine GUARDED_BY(Mutex) = {};
+
+  // The LRUHead of the cache is the most recently used cache entry
+  u16 LRUHead GUARDED_BY(Mutex) = 0;
+  // The LRUTail of the cache is the least recently used cache entry
+  u16 LRUTail GUARDED_BY(Mutex) = 0;
+  // The AvailableHead is the top of the stack of available entries
+  u16 AvailableHead GUARDED_BY(Mutex) = 0;
 };
 
 template <typename Config> class MapAllocator {
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index df276894246c4..0294f62bc2f7a 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -45,6 +45,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.sys_select
     libc.include.sys_socket
     libc.include.sys_stat
+    libc.include.sys_statvfs
     libc.include.sys_syscall
     libc.include.sys_time
     libc.include.sys_types
diff --git a/libc/newhdrgen/yaml/arpa_inet.yaml b/libc/newhdrgen/yaml/arpa/arpa_inet.yaml
similarity index 86%
rename from libc/newhdrgen/yaml/arpa_inet.yaml
rename to libc/newhdrgen/yaml/arpa/arpa_inet.yaml
index 945a602705dba..c01235d4327a5 100644
--- a/libc/newhdrgen/yaml/arpa_inet.yaml
+++ b/libc/newhdrgen/yaml/arpa/arpa_inet.yaml
@@ -1,9 +1,6 @@
 header: arpa-inet.h
 macros: []
-types:
-  - type_name: uint32_t
-  - type_name: uint16_t
-  - type_name: inttypes.h
+types: []
 enums: []
 objects: []
 functions:
diff --git a/libc/newhdrgen/yaml/assert.yaml b/libc/newhdrgen/yaml/assert.yaml
index 9ad0f0628274e..58d6c413cebdc 100644
--- a/libc/newhdrgen/yaml/assert.yaml
+++ b/libc/newhdrgen/yaml/assert.yaml
@@ -13,4 +13,3 @@ functions:
       - type: const char *
       - type: unsigned
       - type: const char *
-    guard: __cplusplus
diff --git a/libc/newhdrgen/yaml/rpc.yaml b/libc/newhdrgen/yaml/gpu/gpu_rpc.yaml
similarity index 100%
rename from libc/newhdrgen/yaml/rpc.yaml
rename to libc/newhdrgen/yaml/gpu/gpu_rpc.yaml
diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml
index 5afde59b6b558..8588389bca4d2 100644
--- a/libc/newhdrgen/yaml/math.yaml
+++ b/libc/newhdrgen/yaml/math.yaml
@@ -7,6 +7,12 @@ types:
 enums: []
 objects: []
 functions:
+  - name: cbrt
+    standards:
+      - stdc
+    return_type: double
+    arguments:
+      - type: double
   - name: cbrtf
     standards:
       - stdc
diff --git a/libc/newhdrgen/yaml/pthread.yaml b/libc/newhdrgen/yaml/pthread.yaml
index f22767eb1b752..292d91751e406 100644
--- a/libc/newhdrgen/yaml/pthread.yaml
+++ b/libc/newhdrgen/yaml/pthread.yaml
@@ -8,12 +8,12 @@ types:
   - type_name: pthread_key_t
   - type_name: pthread_condattr_t
   - type_name: __pthread_tss_dtor_t
+  - type_name: pthread_rwlock_t
   - type_name: pthread_rwlockattr_t
   - type_name: pthread_attr_t
   - type_name: __pthread_start_t
   - type_name: __pthread_once_func_t
   - type_name: __atfork_callback_t
-  - type_name: pthread_rwlock_t
 enums: []
 functions:
   - name: pthread_atfork
@@ -106,7 +106,7 @@ functions:
     return_type: int
     arguments:
       - type: const pthread_condattr_t *__restrict
-      - type: clockid_t * __restrict
+      - type: clockid_t *__restrict
   - name: pthread_condattr_getpshared
     standards: 
       - POSIX
@@ -200,7 +200,8 @@ functions:
     standards: 
       - POSIX
     return_type: pthread_t
-    arguments: []
+    arguments:
+      - type: void
   - name: pthread_setname_np
     standards:
       - GNUExtensions
diff --git a/libc/newhdrgen/yaml/search.yaml b/libc/newhdrgen/yaml/search.yaml
index a7983a70bda73..b4fde14f771a2 100644
--- a/libc/newhdrgen/yaml/search.yaml
+++ b/libc/newhdrgen/yaml/search.yaml
@@ -1,7 +1,6 @@
 header: search.h
 macros: []
 types:
-  - type_name: size_t
   - type_name: struct_hsearch_data
   - type_name: ENTRY
   - type_name: ACTION
diff --git a/libc/newhdrgen/yaml/sys/sys_time.yaml b/libc/newhdrgen/yaml/sys/sys_time.yaml
index a901cdafd26a1..eb3dd548389b3 100644
--- a/libc/newhdrgen/yaml/sys/sys_time.yaml
+++ b/libc/newhdrgen/yaml/sys/sys_time.yaml
@@ -1,8 +1,7 @@
 header: sys-time.h
 standards: Linux
 macros: []
-types:
-  - type_name: struct_timeval
+types: []
 enums: []
 functions: []
 objects: []
diff --git a/libc/newhdrgen/yaml/wchar.yaml b/libc/newhdrgen/yaml/wchar.yaml
index 663267fb69d73..92ecdc26fbc73 100644
--- a/libc/newhdrgen/yaml/wchar.yaml
+++ b/libc/newhdrgen/yaml/wchar.yaml
@@ -4,6 +4,7 @@ types:
   - type_name: size_t
   - type_name: wint_t
   - type_name: wchar_t
+  - type_name: mbstate_t.h
 enums: []
 objects: []
 functions: