[compiler-rt] Fix hooks on realloc (PR #73882)

Wed Nov 29 16:57:20 PST 2023

https://github.com/ChiaHungDuan created https://github.com/llvm/llvm-project/pull/73882

 `realloc` may involve both allocation and deallocation. Given that the
  reporting the events is not atomic and which may lead the hook user to a
  false case that the double-use pattern happens. In general, this can be
  resolved on the hook side. To alleviate the task of handling it, we add
  two new hooks to mark the range and reorder the reporting.


>From 7316b7b87b35b04aa20e02569ca1c4e4922b2c0b Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Tue, 14 Nov 2023 07:03:47 +0000
Subject: [PATCH 1/3] [scudo] Add ScopedTSD to avoid releasing TSD manually

This makes the use of TSD be RAII style and avoid the exposing of the
type of TSDs.

Also move some thread safety analyses from static to runtime because of
its limitation. Even we mark some code path as NO_THREAD_SAFETY_ANALYSIS
but we still have the `assertLocked()` cover the correctness.
---
 compiler-rt/lib/scudo/standalone/combined.h   | 24 +++-----
 .../scudo/standalone/tests/combined_test.cpp  | 38 +++++++-----
 .../lib/scudo/standalone/tests/tsd_test.cpp   | 55 ++++++++----------
 .../lib/scudo/standalone/tsd_exclusive.h      | 54 +++++++++++------
 compiler-rt/lib/scudo/standalone/tsd_shared.h | 58 ++++++++++++-------
 5 files changed, 131 insertions(+), 98 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index b1700e5ecef7f5b..02cd5e80e873665 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -363,9 +363,7 @@ class Allocator {
     if (LIKELY(PrimaryT::canAllocate(NeededSize))) {
       ClassId = SizeClassMap::getClassIdBySize(NeededSize);
       DCHECK_NE(ClassId, 0U);
-      bool UnlockRequired;
-      auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
-      TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+      typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
       Block = TSD->getCache().allocate(ClassId);
       // If the allocation failed, retry in each successively larger class until
       // it fits. If it fails to fit in the largest class, fallback to the
@@ -376,8 +374,6 @@ class Allocator {
         if (!Block)
           ClassId = 0;
       }
-      if (UnlockRequired)
-        TSD->unlock();
     }
     if (UNLIKELY(ClassId == 0)) {
       Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd,
@@ -1148,13 +1144,11 @@ class Allocator {
       void *BlockBegin = getBlockBegin(Ptr, Header);
       const uptr ClassId = Header->ClassId;
       if (LIKELY(ClassId)) {
-        bool UnlockRequired;
-        auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
-        TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-        const bool CacheDrained =
-            TSD->getCache().deallocate(ClassId, BlockBegin);
-        if (UnlockRequired)
-          TSD->unlock();
+        bool CacheDrained;
+        {
+          typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
+          CacheDrained = TSD->getCache().deallocate(ClassId, BlockBegin);
+        }
         // When we have drained some blocks back to the Primary from TSD, that
         // implies that we may have the chance to release some pages as well.
         // Note that in order not to block other thread's accessing the TSD,
@@ -1168,13 +1162,9 @@ class Allocator {
         Secondary.deallocate(Options, BlockBegin);
       }
     } else {
-      bool UnlockRequired;
-      auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
-      TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+      typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
       Quarantine.put(&TSD->getQuarantineCache(),
                      QuarantineCallback(*this, TSD->getCache()), Ptr, Size);
-      if (UnlockRequired)
-        TSD->unlock();
     }
   }
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 3dbd93cacefd684..a29463a0344739a 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -229,11 +229,25 @@ struct TestConditionVariableConfig {
 #define SCUDO_TYPED_TEST(FIXTURE, NAME)                                        \
   template <class TypeParam>                                                   \
   struct FIXTURE##NAME : public FIXTURE<TypeParam> {                           \
+    using BaseT = FIXTURE<TypeParam>;                                          \
     void Run();                                                                \
   };                                                                           \
   SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                                    \
   template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run()
 
+// Accessing `TSD->getCache()` requires `TSD::Mutex` and which doesn't have easy
+// way to test it by thread-safety analysis. Alternatively, we verify the thread
+// safety through runtime check in ScopedTSD and mark the test body with
+// NO_THREAD_SAFETY_ANALYSIS.
+#define SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(FIXTURE, NAME)                     \
+  template <class TypeParam>                                                   \
+  struct FIXTURE##NAME : public FIXTURE<TypeParam> {                           \
+    using BaseT = FIXTURE<TypeParam>;                                          \
+    void Run() NO_THREAD_SAFETY_ANALYSIS;                                      \
+  };                                                                           \
+  SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                                    \
+  template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run()
+
 SCUDO_TYPED_TEST(ScudoCombinedTest, IsOwned) {
   auto *Allocator = this->Allocator.get();
   static scudo::u8 StaticBuffer[scudo::Chunk::getHeaderSize() + 1];
@@ -547,7 +561,8 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, Stats) {
   EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos);
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS {
+SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(ScudoCombinedTest, CacheDrain) {
+  using AllocatorT = typename BaseT::AllocatorT;
   auto *Allocator = this->Allocator.get();
 
   std::vector<void *> V;
@@ -559,17 +574,15 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS {
   for (auto P : V)
     Allocator->deallocate(P, Origin);
 
-  bool UnlockRequired;
-  auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(
+      *Allocator->getTSDRegistry());
   EXPECT_TRUE(!TSD->getCache().isEmpty());
   TSD->getCache().drain();
   EXPECT_TRUE(TSD->getCache().isEmpty());
-  if (UnlockRequired)
-    TSD->unlock();
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS {
+SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(ScudoCombinedTest, ForceCacheDrain) {
+  using AllocatorT = typename BaseT::AllocatorT;
   auto *Allocator = this->Allocator.get();
 
   std::vector<void *> V;
@@ -584,14 +597,11 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS {
   // `ForceAll` will also drain the caches.
   Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll);
 
-  bool UnlockRequired;
-  auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(
+      *Allocator->getTSDRegistry());
   EXPECT_TRUE(TSD->getCache().isEmpty());
   EXPECT_EQ(TSD->getQuarantineCache().getSize(), 0U);
   EXPECT_TRUE(Allocator->getQuarantine()->isEmpty());
-  if (UnlockRequired)
-    TSD->unlock();
 }
 
 SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) {
@@ -892,8 +902,8 @@ TEST(ScudoCombinedTest, BasicTrustyConfig) {
   }
 
   bool UnlockRequired;
-  auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(
+      *Allocator->getTSDRegistry());
   TSD->getCache().drain();
 
   Allocator->releaseToOS(scudo::ReleaseToOS::Force);
diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
index fad8fcf90077116..432129c37d424f7 100644
--- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
@@ -17,6 +17,7 @@
 #include <mutex>
 #include <set>
 #include <thread>
+#include <type_traits>
 
 // We mock out an allocator with a TSD registry, mostly using empty stubs. The
 // cache contains a single volatile uptr, to be able to test that several
@@ -86,7 +87,8 @@ TEST(ScudoTSDTest, TSDRegistryInit) {
   EXPECT_TRUE(Allocator->isInitialized());
 }
 
-template <class AllocatorT> static void testRegistry() {
+template <class AllocatorT>
+static void testRegistry() NO_THREAD_SAFETY_ANALYSIS {
   auto Deleter = [](AllocatorT *A) {
     A->unmapTestOnly();
     delete A;
@@ -99,22 +101,17 @@ template <class AllocatorT> static void testRegistry() {
   Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/true);
   EXPECT_TRUE(Allocator->isInitialized());
 
-  bool UnlockRequired;
-  auto TSD = Registry->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-  EXPECT_NE(TSD, nullptr);
-  EXPECT_EQ(TSD->getCache().Canary, 0U);
-  if (UnlockRequired)
-    TSD->unlock();
+  {
+    typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry);
+    EXPECT_EQ(TSD->getCache().Canary, 0U);
+  }
 
   Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/false);
-  TSD = Registry->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-  EXPECT_NE(TSD, nullptr);
-  EXPECT_EQ(TSD->getCache().Canary, 0U);
-  memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache()));
-  if (UnlockRequired)
-    TSD->unlock();
+  {
+    typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry);
+    EXPECT_EQ(TSD->getCache().Canary, 0U);
+    memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache()));
+  }
 }
 
 TEST(ScudoTSDTest, TSDRegistryBasic) {
@@ -129,7 +126,12 @@ static std::mutex Mutex;
 static std::condition_variable Cv;
 static bool Ready;
 
-template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
+// Accessing `TSD->getCache()` requires `TSD::Mutex` and which doesn't have easy
+// way to test it by thread-safety analysis. Alternatively, we verify the thread
+// safety through runtime check in ScopedTSD and mark it as
+// NO_THREAD_SAFETY_ANALYSIS here.
+template <typename AllocatorT>
+static void stressCache(AllocatorT *Allocator) NO_THREAD_SAFETY_ANALYSIS {
   auto Registry = Allocator->getTSDRegistry();
   {
     std::unique_lock<std::mutex> Lock(Mutex);
@@ -137,14 +139,13 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
       Cv.wait(Lock);
   }
   Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false);
-  bool UnlockRequired;
-  auto TSD = Registry->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-  EXPECT_NE(TSD, nullptr);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry);
   // For an exclusive TSD, the cache should be empty. We cannot guarantee the
   // same for a shared TSD.
-  if (!UnlockRequired)
+  if (std::is_same<typename AllocatorT::TSDRegistryT,
+                   scudo::TSDRegistryExT<AllocatorT>>()) {
     EXPECT_EQ(TSD->getCache().Canary, 0U);
+  }
   // Transform the thread id to a uptr to use it as canary.
   const scudo::uptr Canary = static_cast<scudo::uptr>(
       std::hash<std::thread::id>{}(std::this_thread::get_id()));
@@ -152,8 +153,6 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
   // Loop a few times to make sure that a concurrent thread isn't modifying it.
   for (scudo::uptr I = 0; I < 4096U; I++)
     EXPECT_EQ(TSD->getCache().Canary, Canary);
-  if (UnlockRequired)
-    TSD->unlock();
 }
 
 template <class AllocatorT> static void testRegistryThreaded() {
@@ -195,14 +194,10 @@ static void stressSharedRegistry(MockAllocator<SharedCaches> *Allocator) {
       Cv.wait(Lock);
   }
   Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false);
-  bool UnlockRequired;
   for (scudo::uptr I = 0; I < 4096U; I++) {
-    auto TSD = Registry->getTSDAndLock(&UnlockRequired);
-    TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-    EXPECT_NE(TSD, nullptr);
-    Set.insert(reinterpret_cast<void *>(TSD));
-    if (UnlockRequired)
-      TSD->unlock();
+    typename MockAllocator<SharedCaches>::TSDRegistryT::ScopedTSD TSD(
+        *Registry);
+    Set.insert(reinterpret_cast<void *>(&*TSD));
   }
   {
     std::unique_lock<std::mutex> Lock(Mutex);
diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
index 238367420238dae..9a32d03b0613987 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
@@ -27,6 +27,31 @@ struct ThreadState {
 template <class Allocator> void teardownThread(void *Ptr);
 
 template <class Allocator> struct TSDRegistryExT {
+  using ThisT = TSDRegistryExT<Allocator>;
+
+  struct ScopedTSD {
+    ScopedTSD(ThisT &TSDRegistry) {
+      CurrentTSD = TSDRegistry.getTSDAndLock(&IsFallback);
+      DCHECK_NE(CurrentTSD, nullptr);
+    }
+
+    ~ScopedTSD() {
+      if (UNLIKELY(IsFallback))
+        CurrentTSD->unlock();
+    }
+
+    TSD<Allocator> &operator*() { return *CurrentTSD; }
+
+    TSD<Allocator> *operator->() {
+      CurrentTSD->assertLocked(/*BypassCheck=*/!IsFallback);
+      return CurrentTSD;
+    }
+
+  private:
+    TSD<Allocator> *CurrentTSD;
+    bool IsFallback;
+  };
+
   void init(Allocator *Instance) REQUIRES(Mutex) {
     DCHECK(!Initialized);
     Instance->init();
@@ -74,23 +99,6 @@ template <class Allocator> struct TSDRegistryExT {
     initThread(Instance, MinimalInit);
   }
 
-  // TODO(chiahungduan): Consider removing the argument `UnlockRequired` by
-  // embedding the logic into TSD or always locking the TSD. It will enable us
-  // to properly mark thread annotation here and adding proper runtime
-  // assertions in the member functions of TSD. For example, assert the lock is
-  // acquired before calling TSD::commitBack().
-  ALWAYS_INLINE TSD<Allocator> *
-  getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS {
-    if (LIKELY(State.InitState == ThreadState::Initialized &&
-               !atomic_load(&Disabled, memory_order_acquire))) {
-      *UnlockRequired = false;
-      return &ThreadTSD;
-    }
-    FallbackTSD.lock();
-    *UnlockRequired = true;
-    return &FallbackTSD;
-  }
-
   // To disable the exclusive TSD registry, we effectively lock the fallback TSD
   // and force all threads to attempt to use it instead of their local one.
   void disable() NO_THREAD_SAFETY_ANALYSIS {
@@ -123,6 +131,18 @@ template <class Allocator> struct TSDRegistryExT {
   }
 
 private:
+  ALWAYS_INLINE TSD<Allocator> *
+  getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS {
+    if (LIKELY(State.InitState == ThreadState::Initialized &&
+               !atomic_load(&Disabled, memory_order_acquire))) {
+      *UnlockRequired = false;
+      return &ThreadTSD;
+    }
+    FallbackTSD.lock();
+    *UnlockRequired = true;
+    return &FallbackTSD;
+  }
+
   // Using minimal initialization allows for global initialization while keeping
   // the thread specific structure untouched. The fallback structure will be
   // used instead.
diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
index 1bca578ee14be4c..a2e9bb3d4a6632b 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -26,6 +26,27 @@ namespace scudo {
 
 template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
 struct TSDRegistrySharedT {
+  using ThisT = TSDRegistrySharedT<Allocator, TSDsArraySize, DefaultTSDCount>;
+
+  struct ScopedTSD {
+    ScopedTSD(ThisT &TSDRegistry) {
+      CurrentTSD = TSDRegistry.getTSDAndLock();
+      DCHECK_NE(CurrentTSD, nullptr);
+    }
+
+    ~ScopedTSD() { CurrentTSD->unlock(); }
+
+    TSD<Allocator> &operator*() { return *CurrentTSD; }
+
+    TSD<Allocator> *operator->() {
+      CurrentTSD->assertLocked(/*BypassCheck=*/false);
+      return CurrentTSD;
+    }
+
+  private:
+    TSD<Allocator> *CurrentTSD;
+  };
+
   void init(Allocator *Instance) REQUIRES(Mutex) {
     DCHECK(!Initialized);
     Instance->init();
@@ -70,26 +91,6 @@ struct TSDRegistrySharedT {
     initThread(Instance);
   }
 
-  // TSDs is an array of locks and which is not supported for marking
-  // thread-safety capability.
-  ALWAYS_INLINE TSD<Allocator> *
-  getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS {
-    TSD<Allocator> *TSD = getCurrentTSD();
-    DCHECK(TSD);
-    *UnlockRequired = true;
-    // Try to lock the currently associated context.
-    if (TSD->tryLock())
-      return TSD;
-    // If that fails, go down the slow path.
-    if (TSDsArraySize == 1U) {
-      // Only 1 TSD, not need to go any further.
-      // The compiler will optimize this one way or the other.
-      TSD->lock();
-      return TSD;
-    }
-    return getTSDAndLockSlow(TSD);
-  }
-
   void disable() NO_THREAD_SAFETY_ANALYSIS {
     Mutex.lock();
     for (u32 I = 0; I < TSDsArraySize; I++)
@@ -132,6 +133,23 @@ struct TSDRegistrySharedT {
   }
 
 private:
+  ALWAYS_INLINE TSD<Allocator> *
+  getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS {
+    TSD<Allocator> *TSD = getCurrentTSD();
+    DCHECK(TSD);
+    // Try to lock the currently associated context.
+    if (TSD->tryLock())
+      return TSD;
+    // If that fails, go down the slow path.
+    if (TSDsArraySize == 1U) {
+      // Only 1 TSD, not need to go any further.
+      // The compiler will optimize this one way or the other.
+      TSD->lock();
+      return TSD;
+    }
+    return getTSDAndLockSlow(TSD);
+  }
+
   ALWAYS_INLINE uptr *getTlsPtr() const {
 #if SCUDO_HAS_PLATFORM_TLS_SLOT
     return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot());

>From cdc29a40c6034af1ad4c7b5126c2b572fcd39d63 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Tue, 14 Nov 2023 19:45:18 +0000
Subject: [PATCH 2/3] fixup! [scudo] Add ScopedTSD to avoid releasing TSD
 manually

---
 compiler-rt/lib/scudo/standalone/tsd_shared.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
index a2e9bb3d4a6632b..0ac2be3d4c294df 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -133,8 +133,7 @@ struct TSDRegistrySharedT {
   }
 
 private:
-  ALWAYS_INLINE TSD<Allocator> *
-  getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS {
+  ALWAYS_INLINE TSD<Allocator> *getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS {
     TSD<Allocator> *TSD = getCurrentTSD();
     DCHECK(TSD);
     // Try to lock the currently associated context.

>From df1a4b700efc00e4e298c57cc67604b0594f6f84 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Wed, 29 Nov 2023 23:53:32 +0000
Subject: [PATCH 3/3] [scudo] Add hooks to mark the range of realloc

`realloc` may involve both allocation and deallocation. Given that the
reporting the events is not atomic and which may lead the hook user to a
false case that the double-use pattern happens. In general, this can be
resolved on the hook side. To alleviate the task of handling it, we add
two new hooks to mark the range and reorder the reporting.
---
 compiler-rt/lib/scudo/standalone/combined.h   | 11 +++-
 .../standalone/include/scudo/interface.h      |  7 +-
 .../standalone/tests/wrappers_c_test.cpp      | 42 ++++++++++--
 .../lib/scudo/standalone/wrappers_c.inc       | 65 ++++++++++++++-----
 4 files changed, 100 insertions(+), 25 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 02cd5e80e873665..9bfe1ea6c96f39e 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -823,10 +823,15 @@ class Allocator {
   // for it, which then forces realloc to copy the usable size of a chunk as
   // opposed to its actual size.
   uptr getUsableSize(const void *Ptr) {
-    initThreadMaybe();
     if (UNLIKELY(!Ptr))
       return 0;
 
+    return getAllocSize(Ptr);
+  }
+
+  uptr getAllocSize(const void *Ptr) {
+    initThreadMaybe();
+
 #ifdef GWP_ASAN_HOOKS
     if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr)))
       return GuardedAlloc.getSize(Ptr);
@@ -835,9 +840,11 @@ class Allocator {
     Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
-    // Getting the usable size of a chunk only makes sense if it's allocated.
+
+    // Getting the alloc size of a chunk only makes sense if it's allocated.
     if (UNLIKELY(Header.State != Chunk::State::Allocated))
       reportInvalidChunkState(AllocatorAction::Sizing, const_cast<void *>(Ptr));
+
     return getSize(Ptr, &Header);
   }
 
diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
index 6c0c521f8d82f85..e4534fc9ec2f721 100644
--- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
+++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
@@ -17,10 +17,15 @@ extern "C" {
 __attribute__((weak)) const char *__scudo_default_options(void);
 
 // Post-allocation & pre-deallocation hooks.
-// They must be thread-safe and not use heap related functions.
 __attribute__((weak)) void __scudo_allocate_hook(void *ptr, size_t size);
 __attribute__((weak)) void __scudo_deallocate_hook(void *ptr);
 
+// These hooks are used to mark the scope of doing realloc(). Note that the
+// allocation/deallocation are still reported through the hooks above, this is
+// only used when you want to group two operations in the realloc().
+__attribute__((weak)) void __scudo_realloc_begin_hook(void *old_ptr);
+__attribute__((weak)) void __scudo_realloc_end_hook(void *old_ptr);
+
 void __scudo_print_stats(void);
 
 typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg);
diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
index 623550535b6c8e2..7b47c872ae88d71 100644
--- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp
@@ -61,8 +61,13 @@ struct AllocContext {
 struct DeallocContext {
   void *Ptr;
 };
+struct ReallocContext {
+  void *Begin;
+  void *End;
+};
 static AllocContext AC;
 static DeallocContext DC;
+static ReallocContext RC;
 
 #if (SCUDO_ENABLE_HOOKS_TESTS == 1)
 __attribute__((visibility("default"))) void __scudo_allocate_hook(void *Ptr,
@@ -73,6 +78,14 @@ __attribute__((visibility("default"))) void __scudo_allocate_hook(void *Ptr,
 __attribute__((visibility("default"))) void __scudo_deallocate_hook(void *Ptr) {
   DC.Ptr = Ptr;
 }
+__attribute__((visibility("default"))) void
+__scudo_realloc_begin_hook(void *Ptr) {
+  RC.Begin = Ptr;
+}
+__attribute__((visibility("default"))) void
+__scudo_realloc_end_hook(void *Ptr) {
+  RC.End = Ptr;
+}
 #endif // (SCUDO_ENABLE_HOOKS_TESTS == 1)
 }
 
@@ -83,9 +96,12 @@ class ScudoWrappersCTest : public Test {
       printf("Hooks are enabled but hooks tests are disabled.\n");
   }
 
-  void invalidateAllocHookPtrAs(UNUSED void *Ptr) {
-    if (SCUDO_ENABLE_HOOKS_TESTS)
+  void invalidateHookPtrsAs(UNUSED void *Ptr) {
+    if (SCUDO_ENABLE_HOOKS_TESTS) {
       AC.Ptr = Ptr;
+      DC.Ptr = Ptr;
+      RC.Begin = RC.End = Ptr;
+    }
   }
   void verifyAllocHookPtr(UNUSED void *Ptr) {
     if (SCUDO_ENABLE_HOOKS_TESTS)
@@ -99,6 +115,12 @@ class ScudoWrappersCTest : public Test {
     if (SCUDO_ENABLE_HOOKS_TESTS)
       EXPECT_EQ(Ptr, DC.Ptr);
   }
+  void verifyReallocHooksScope(UNUSED void *Ptr) {
+    if (SCUDO_ENABLE_HOOKS_TESTS) {
+      EXPECT_EQ(Ptr, RC.Begin);
+      EXPECT_EQ(Ptr, RC.End);
+    }
+  }
 };
 using ScudoWrappersCDeathTest = ScudoWrappersCTest;
 
@@ -258,26 +280,30 @@ TEST_F(ScudoWrappersCTest, AlignedAlloc) {
 }
 
 TEST_F(ScudoWrappersCDeathTest, Realloc) {
+  invalidateHookPtrsAs(reinterpret_cast<void *>(0xdeadbeef));
   // realloc(nullptr, N) is malloc(N)
   void *P = realloc(nullptr, Size);
   EXPECT_NE(P, nullptr);
   verifyAllocHookPtr(P);
   verifyAllocHookSize(Size);
+  verifyReallocHooksScope(nullptr);
   free(P);
   verifyDeallocHookPtr(P);
 
+  invalidateHookPtrsAs(reinterpret_cast<void *>(0xdeadbeef));
   P = malloc(Size);
   EXPECT_NE(P, nullptr);
   // realloc(P, 0U) is free(P) and returns nullptr
   EXPECT_EQ(realloc(P, 0U), nullptr);
   verifyDeallocHookPtr(P);
+  verifyReallocHooksScope(P);
 
   P = malloc(Size);
   EXPECT_NE(P, nullptr);
   EXPECT_LE(Size, malloc_usable_size(P));
   memset(P, 0x42, Size);
 
-  invalidateAllocHookPtrAs(reinterpret_cast<void *>(0xdeadbeef));
+  invalidateHookPtrsAs(reinterpret_cast<void *>(0xdeadbeef));
   void *OldP = P;
   P = realloc(P, Size * 2U);
   EXPECT_NE(P, nullptr);
@@ -285,14 +311,16 @@ TEST_F(ScudoWrappersCDeathTest, Realloc) {
   for (size_t I = 0; I < Size; I++)
     EXPECT_EQ(0x42, (reinterpret_cast<uint8_t *>(P))[I]);
   if (OldP == P) {
-    verifyAllocHookPtr(reinterpret_cast<void *>(0xdeadbeef));
+    verifyDeallocHookPtr(OldP);
+    verifyAllocHookPtr(OldP);
   } else {
     verifyAllocHookPtr(P);
     verifyAllocHookSize(Size * 2U);
     verifyDeallocHookPtr(OldP);
   }
+  verifyReallocHooksScope(OldP);
 
-  invalidateAllocHookPtrAs(reinterpret_cast<void *>(0xdeadbeef));
+  invalidateHookPtrsAs(reinterpret_cast<void *>(0xdeadbeef));
   OldP = P;
   P = realloc(P, Size / 2U);
   EXPECT_NE(P, nullptr);
@@ -300,11 +328,13 @@ TEST_F(ScudoWrappersCDeathTest, Realloc) {
   for (size_t I = 0; I < Size / 2U; I++)
     EXPECT_EQ(0x42, (reinterpret_cast<uint8_t *>(P))[I]);
   if (OldP == P) {
-    verifyAllocHookPtr(reinterpret_cast<void *>(0xdeadbeef));
+    verifyDeallocHookPtr(OldP);
+    verifyAllocHookPtr(OldP);
   } else {
     verifyAllocHookPtr(P);
     verifyAllocHookSize(Size / 2U);
   }
+  verifyReallocHooksScope(OldP);
   free(P);
 
   EXPECT_DEATH(P = realloc(P, Size), "");
diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
index e213da73393b5da..fde459bf5bca631 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -27,6 +27,51 @@ static void reportDeallocation(void *ptr) {
     if (__scudo_deallocate_hook)
       __scudo_deallocate_hook(ptr);
 }
+static void reportReallocBegin(void *old_ptr) {
+  if (SCUDO_ENABLE_HOOKS)
+    if (__scudo_realloc_begin_hook)
+      __scudo_realloc_begin_hook(old_ptr);
+}
+static void reportReallocEnd(void *old_ptr) {
+  if (SCUDO_ENABLE_HOOKS)
+    if (__scudo_realloc_end_hook)
+      __scudo_realloc_end_hook(old_ptr);
+}
+
+static void *reallocImpl(void *ptr, size_t size) {
+  if (!ptr) {
+    void *Ptr = SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Malloc,
+                                         SCUDO_MALLOC_ALIGNMENT);
+    reportAllocation(Ptr, size);
+    return Ptr;
+  }
+  if (size == 0) {
+    reportDeallocation(ptr);
+    SCUDO_ALLOCATOR.deallocate(ptr, scudo::Chunk::Origin::Malloc);
+    return nullptr;
+  }
+
+  // Given that the reporting of deallocation and allocation are not atomic, we
+  // always pretend the old pointer will be released so that the user don't need
+  // to worry about the false double-use case from the view of hooks.
+  // For example, before the reporting all the events in the `realloc` has
+  // finished, another thread may have got the pointer released by the
+  // `realloc` and report it to the hook. It may be misinterpreted as double-use
+  // if it's not handled properly on the hook side.
+  reportDeallocation(ptr);
+  void *NewPtr = SCUDO_ALLOCATOR.reallocate(ptr, size, SCUDO_MALLOC_ALIGNMENT);
+  if (NewPtr != nullptr) {
+    // Note that even if NewPtr == ptr, the size has changed. We still need to
+    // report the new size.
+    reportAllocation(NewPtr, size);
+  } else {
+    // If `realloc` fails, the old pointer is not released. Report the old
+    // pointer as allocated.
+    reportAllocation(ptr, SCUDO_ALLOCATOR.getAllocSize(ptr));
+  }
+
+  return NewPtr;
+}
 
 extern "C" {
 
@@ -163,23 +208,11 @@ INTERFACE WEAK void *SCUDO_PREFIX(pvalloc)(size_t size) {
 }
 
 INTERFACE WEAK void *SCUDO_PREFIX(realloc)(void *ptr, size_t size) {
-  if (!ptr) {
-    void *Ptr = SCUDO_ALLOCATOR.allocate(size, scudo::Chunk::Origin::Malloc,
-                                         SCUDO_MALLOC_ALIGNMENT);
-    reportAllocation(Ptr, size);
-    return scudo::setErrnoOnNull(Ptr);
-  }
-  if (size == 0) {
-    reportDeallocation(ptr);
-    SCUDO_ALLOCATOR.deallocate(ptr, scudo::Chunk::Origin::Malloc);
-    return nullptr;
-  }
+  reportReallocBegin(ptr);
 
-  void *NewPtr = SCUDO_ALLOCATOR.reallocate(ptr, size, SCUDO_MALLOC_ALIGNMENT);
-  if (NewPtr != ptr) {
-    reportAllocation(NewPtr, size);
-    reportDeallocation(ptr);
-  }
+  void *NewPtr = reallocImpl(ptr, size);
+
+  reportReallocEnd(ptr);
 
   return scudo::setErrnoOnNull(NewPtr);
 }