[compiler-rt] 21d5001 - scudo: Add support for diagnosing memory errors when memory tagging is enabled.

Fri Apr 17 17:26:40 PDT 2020

Author: Peter Collingbourne
Date: 2020-04-17T17:26:30-07:00
New Revision: 21d50019ca83765a655b3d67331dfb83cf3d260d

URL: https://github.com/llvm/llvm-project/commit/21d50019ca83765a655b3d67331dfb83cf3d260d
DIFF: https://github.com/llvm/llvm-project/commit/21d50019ca83765a655b3d67331dfb83cf3d260d.diff

LOG: scudo: Add support for diagnosing memory errors when memory tagging is enabled.

Introduce a function __scudo_get_error_info() that may be called to interpret
a crash resulting from a memory error, potentially in another process,
given information extracted from the crashing process. The crash may be
interpreted as a use-after-free, buffer overflow or buffer underflow.

Also introduce a feature to optionally record a stack trace for each
allocation and deallocation. If this feature is enabled, a stack trace for
the allocation and (if applicable) the deallocation will also be available
via __scudo_get_error_info().

Differential Revision: https://reviews.llvm.org/D77283

Added: 
    compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt
    compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp
    compiler-rt/lib/scudo/standalone/stack_depot.h

Modified: 
    compiler-rt/lib/CMakeLists.txt
    compiler-rt/lib/scudo/standalone/combined.h
    compiler-rt/lib/scudo/standalone/common.h
    compiler-rt/lib/scudo/standalone/fuchsia.cpp
    compiler-rt/lib/scudo/standalone/include/scudo/interface.h
    compiler-rt/lib/scudo/standalone/linux.cpp
    compiler-rt/lib/scudo/standalone/memtag.h
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h
    compiler-rt/lib/scudo/standalone/wrappers_c.inc
    compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt
index 39082aa06d42..2020ee32d4f7 100644

--- a/compiler-rt/lib/CMakeLists.txt
+++ b/compiler-rt/lib/CMakeLists.txt
@@ -59,3 +59,9 @@ endif()
 if(COMPILER_RT_BUILD_LIBFUZZER)
   compiler_rt_build_runtime(fuzzer)
 endif()
+
+# It doesn't normally make sense to build runtimes when a sanitizer is enabled,
+# so we don't add_subdirectory the runtimes in that case. However, the opposite
+# is true for fuzzers that exercise parts of the runtime. So we add the fuzzer
+# directories explicitly here.
+add_subdirectory(scudo/standalone/fuzz)

diff  --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 1fffea4dc5c2..51e0c5d1966c 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -18,6 +18,7 @@
 #include "quarantine.h"
 #include "report.h"
 #include "secondary.h"
+#include "stack_depot.h"
 #include "string_utils.h"
 #include "tsd.h"
 
@@ -31,6 +32,14 @@
 
 extern "C" inline void EmptyCallback() {}
 
+#if SCUDO_ANDROID && __ANDROID_API__ == 10000
+// This function is not part of the NDK so it does not appear in any public
+// header files. We only declare/use it when targeting the platform (i.e. API
+// level 10000).
+extern "C" size_t android_unsafe_frame_pointer_chase(scudo::uptr *buf,
+                                                     size_t num_entries);
+#endif
+
 namespace scudo {
 
 enum class Option { ReleaseInterval };
@@ -142,6 +151,7 @@ class Allocator {
     Options.ZeroContents = getFlags()->zero_contents;
     Options.DeallocTypeMismatch = getFlags()->dealloc_type_mismatch;
     Options.DeleteSizeMismatch = getFlags()->delete_size_mismatch;
+    Options.TrackAllocationStacks = false;
     Options.QuarantineMaxChunkSize =
         static_cast<u32>(getFlags()->quarantine_max_chunk_size);
 
@@ -221,6 +231,20 @@ class Allocator {
     return Ptr;
   }
 
+  NOINLINE u32 collectStackTrace() {
+#if SCUDO_ANDROID && __ANDROID_API__ == 10000
+    // Discard collectStackTrace() frame and allocator function frame.
+    constexpr uptr DiscardFrames = 2;
+    uptr Stack[MaxTraceSize + DiscardFrames];
+    uptr Size =
+        android_unsafe_frame_pointer_chase(Stack, MaxTraceSize + DiscardFrames);
+    Size = Min<uptr>(Size, MaxTraceSize + DiscardFrames);
+    return Depot.insert(Stack + Min<uptr>(DiscardFrames, Size), Stack + Size);
+#else
+    return 0;
+#endif
+  }
+
   NOINLINE void *allocate(uptr Size, Chunk::Origin Origin,
                           uptr Alignment = MinAlignment,
                           bool ZeroContents = false) {
@@ -359,9 +383,15 @@ class Allocator {
             PrevEnd = NextPage;
           TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
           resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd);
+          if (Size) {
+            // Clear any stack metadata that may have previously been stored in
+            // the chunk data.
+            memset(TaggedPtr, 0, archMemoryTagGranuleSize());
+          }
         } else {
           TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd);
         }
+        storeAllocationStackMaybe(Ptr);
       } else if (UNLIKELY(ZeroContents)) {
         // This condition is not necessarily unlikely, but since memset is
         // costly, we might as well mark it as such.
@@ -515,10 +545,12 @@ class Allocator {
                      : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) &
             Chunk::SizeOrUnusedBytesMask;
         Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader);
-        if (UNLIKELY(ClassId && useMemoryTagging()))
+        if (UNLIKELY(ClassId && useMemoryTagging())) {
           resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize,
                             reinterpret_cast<uptr>(OldTaggedPtr) + NewSize,
                             BlockEnd);
+          storeAllocationStackMaybe(OldPtr);
+        }
         return OldTaggedPtr;
       }
     }
@@ -689,6 +721,135 @@ class Allocator {
 
   void disableMemoryTagging() { Primary.disableMemoryTagging(); }
 
+  void setTrackAllocationStacks(bool Track) {
+    Options.TrackAllocationStacks = Track;
+  }
+
+  const char *getStackDepotAddress() const {
+    return reinterpret_cast<const char *>(&Depot);
+  }
+
+  const char *getRegionInfoArrayAddress() const {
+    return Primary.getRegionInfoArrayAddress();
+  }
+
+  static uptr getRegionInfoArraySize() {
+    return PrimaryT::getRegionInfoArraySize();
+  }
+
+  static void getErrorInfo(struct scudo_error_info *ErrorInfo,
+                           uintptr_t FaultAddr, const char *DepotPtr,
+                           const char *RegionInfoPtr, const char *Memory,
+                           const char *MemoryTags, uintptr_t MemoryAddr,
+                           size_t MemorySize) {
+    *ErrorInfo = {};
+    if (!PrimaryT::SupportsMemoryTagging ||
+        MemoryAddr + MemorySize < MemoryAddr)
+      return;
+
+    uptr UntaggedFaultAddr = untagPointer(FaultAddr);
+    u8 FaultAddrTag = extractTag(FaultAddr);
+    BlockInfo Info =
+        PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr);
+
+    auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool {
+      if (Addr < MemoryAddr ||
+          Addr + archMemoryTagGranuleSize() < Addr ||
+          Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize)
+        return false;
+      *Data = &Memory[Addr - MemoryAddr];
+      *Tag = static_cast<u8>(
+          MemoryTags[(Addr - MemoryAddr) / archMemoryTagGranuleSize()]);
+      return true;
+    };
+
+    auto ReadBlock = [&](uptr Addr, uptr *ChunkAddr,
+                         Chunk::UnpackedHeader *Header, const u32 **Data,
+                         u8 *Tag) {
+      const char *BlockBegin;
+      u8 BlockBeginTag;
+      if (!GetGranule(Addr, &BlockBegin, &BlockBeginTag))
+        return false;
+      uptr ChunkOffset = getChunkOffsetFromBlock(BlockBegin);
+      *ChunkAddr = Addr + ChunkOffset;
+
+      const char *ChunkBegin;
+      if (!GetGranule(*ChunkAddr, &ChunkBegin, Tag))
+        return false;
+      *Header = *reinterpret_cast<const Chunk::UnpackedHeader *>(
+          ChunkBegin - Chunk::getHeaderSize());
+      *Data = reinterpret_cast<const u32 *>(ChunkBegin);
+      return true;
+    };
+
+    auto *Depot = reinterpret_cast<const StackDepot *>(DepotPtr);
+
+    auto MaybeCollectTrace = [&](uintptr_t(&Trace)[MaxTraceSize], u32 Hash) {
+      uptr RingPos, Size;
+      if (!Depot->find(Hash, &RingPos, &Size))
+        return;
+      for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I)
+        Trace[I] = (*Depot)[RingPos + I];
+    };
+
+    size_t NextErrorReport = 0;
+
+    // First, check for UAF.
+    {
+      uptr ChunkAddr;
+      Chunk::UnpackedHeader Header;
+      const u32 *Data;
+      uint8_t Tag;
+      if (ReadBlock(Info.BlockBegin, &ChunkAddr, &Header, &Data, &Tag) &&
+          Header.State != Chunk::State::Allocated &&
+          Data[MemTagPrevTagIndex] == FaultAddrTag) {
+        auto *R = &ErrorInfo->reports[NextErrorReport++];
+        R->error_type = USE_AFTER_FREE;
+        R->allocation_address = ChunkAddr;
+        R->allocation_size = Header.SizeOrUnusedBytes;
+        MaybeCollectTrace(R->allocation_trace,
+                          Data[MemTagAllocationTraceIndex]);
+        R->allocation_tid = Data[MemTagAllocationTidIndex];
+        MaybeCollectTrace(R->deallocation_trace,
+                          Data[MemTagDeallocationTraceIndex]);
+        R->deallocation_tid = Data[MemTagDeallocationTidIndex];
+      }
+    }
+
+    auto CheckOOB = [&](uptr BlockAddr) {
+      if (BlockAddr < Info.RegionBegin || BlockAddr >= Info.RegionEnd)
+        return false;
+
+      uptr ChunkAddr;
+      Chunk::UnpackedHeader Header;
+      const u32 *Data;
+      uint8_t Tag;
+      if (!ReadBlock(BlockAddr, &ChunkAddr, &Header, &Data, &Tag) ||
+          Header.State != Chunk::State::Allocated || Tag != FaultAddrTag)
+        return false;
+
+      auto *R = &ErrorInfo->reports[NextErrorReport++];
+      R->error_type =
+          UntaggedFaultAddr < ChunkAddr ? BUFFER_UNDERFLOW : BUFFER_OVERFLOW;
+      R->allocation_address = ChunkAddr;
+      R->allocation_size = Header.SizeOrUnusedBytes;
+      MaybeCollectTrace(R->allocation_trace, Data[MemTagAllocationTraceIndex]);
+      R->allocation_tid = Data[MemTagAllocationTidIndex];
+      return NextErrorReport ==
+             sizeof(ErrorInfo->reports) / sizeof(ErrorInfo->reports[0]);
+    };
+
+    if (CheckOOB(Info.BlockBegin))
+      return;
+
+    // Check for OOB in the 30 surrounding blocks. Beyond that we are likely to
+    // hit false positives.
+    for (int I = 1; I != 16; ++I)
+      if (CheckOOB(Info.BlockBegin + I * Info.BlockSize) ||
+          CheckOOB(Info.BlockBegin - I * Info.BlockSize))
+        return;
+  }
+
 private:
   using SecondaryT = typename Params::Secondary;
   typedef typename PrimaryT::SizeClassMap SizeClassMap;
@@ -708,6 +869,26 @@ class Allocator {
 
   static const u32 BlockMarker = 0x44554353U;
 
+  // These are indexes into an "array" of 32-bit values that store information
+  // inline with a chunk that is relevant to diagnosing memory tag faults, where
+  // 0 corresponds to the address of the user memory. This means that negative
+  // indexes may be used to store information about allocations, while positive
+  // indexes may only be used to store information about deallocations, because
+  // the user memory is in use until it has been deallocated. The smallest index
+  // that may be used is -2, which corresponds to 8 bytes before the user
+  // memory, because the chunk header size is 8 bytes and in allocators that
+  // support memory tagging the minimum alignment is at least the tag granule
+  // size (16 on aarch64), and the largest index that may be used is 3 because
+  // we are only guaranteed to have at least a granule's worth of space in the
+  // user memory.
+  static const sptr MemTagAllocationTraceIndex = -2;
+  static const sptr MemTagAllocationTidIndex = -1;
+  static const sptr MemTagDeallocationTraceIndex = 0;
+  static const sptr MemTagDeallocationTidIndex = 1;
+  static const sptr MemTagPrevTagIndex = 2;
+
+  static const uptr MaxTraceSize = 64;
+
   GlobalStats Stats;
   TSDRegistryT TSDRegistry;
   PrimaryT Primary;
@@ -721,6 +902,7 @@ class Allocator {
     u8 ZeroContents : 1;        // zero_contents
     u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch
     u8 DeleteSizeMismatch : 1;  // delete_size_mismatch
+    u8 TrackAllocationStacks : 1;
     u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size
   } Options;
 
@@ -728,6 +910,8 @@ class Allocator {
   gwp_asan::GuardedPoolAllocator GuardedAlloc;
 #endif // GWP_ASAN_HOOKS
 
+  StackDepot Depot;
+
   // The following might get optimized out by the compiler.
   NOINLINE void performSanityChecks() {
     // Verify that the header offset field can hold the maximum offset. In the
@@ -787,8 +971,10 @@ class Allocator {
                                    uptr Size) {
     Chunk::UnpackedHeader NewHeader = *Header;
     if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) {
+      u8 PrevTag = extractTag(loadTag(reinterpret_cast<uptr>(Ptr)));
       uptr TaggedBegin, TaggedEnd;
       setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd);
+      storeDeallocationStackMaybe(Ptr, PrevTag);
     }
     // If the quarantine is disabled, the actual size of a chunk is 0 or larger
     // than the maximum allowed, we return a chunk directly to the backend.
@@ -824,13 +1010,39 @@ class Allocator {
 
   bool getChunkFromBlock(uptr Block, uptr *Chunk,
                          Chunk::UnpackedHeader *Header) {
-    u32 Offset = 0;
-    if (reinterpret_cast<u32 *>(Block)[0] == BlockMarker)
-      Offset = reinterpret_cast<u32 *>(Block)[1];
-    *Chunk = Block + Offset + Chunk::getHeaderSize();
+    *Chunk =
+        Block + getChunkOffsetFromBlock(reinterpret_cast<const char *>(Block));
     return Chunk::isValid(Cookie, reinterpret_cast<void *>(*Chunk), Header);
   }
 
+  static uptr getChunkOffsetFromBlock(const char *Block) {
+    u32 Offset = 0;
+    if (reinterpret_cast<const u32 *>(Block)[0] == BlockMarker)
+      Offset = reinterpret_cast<const u32 *>(Block)[1];
+    return Offset + Chunk::getHeaderSize();
+  }
+
+  void storeAllocationStackMaybe(void *Ptr) {
+    if (!UNLIKELY(Options.TrackAllocationStacks))
+      return;
+    auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
+    Ptr32[MemTagAllocationTraceIndex] = collectStackTrace();
+    Ptr32[MemTagAllocationTidIndex] = getThreadID();
+  }
+
+  void storeDeallocationStackMaybe(void *Ptr, uint8_t PrevTag) {
+    if (!UNLIKELY(Options.TrackAllocationStacks))
+      return;
+
+    // Disable tag checks here so that we don't need to worry about zero sized
+    // allocations.
+    ScopedDisableMemoryTagChecks x;
+    auto *Ptr32 = reinterpret_cast<u32 *>(Ptr);
+    Ptr32[MemTagDeallocationTraceIndex] = collectStackTrace();
+    Ptr32[MemTagDeallocationTidIndex] = getThreadID();
+    Ptr32[MemTagPrevTagIndex] = PrevTag;
+  }
+
   uptr getStats(ScopedString *Str) {
     Primary.getStats(Str);
     Secondary.getStats(Str);

diff  --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h
index e026e34c0045..350d8d9fcd91 100644
--- a/compiler-rt/lib/scudo/standalone/common.h
+++ b/compiler-rt/lib/scudo/standalone/common.h
@@ -133,6 +133,8 @@ const char *getEnv(const char *Name);
 
 u64 getMonotonicTime();
 
+u32 getThreadID();
+
 // Our randomness gathering function is limited to 256 bytes to ensure we get
 // as many bytes as requested, and avoid interruptions (on Linux).
 constexpr uptr MaxRandomLength = 256U;
@@ -173,6 +175,13 @@ void NORETURN dieOnMapUnmapError(bool OutOfMemory = false);
 
 void setAbortMessage(const char *Message);
 
+struct BlockInfo {
+  uptr BlockBegin;
+  uptr BlockSize;
+  uptr RegionBegin;
+  uptr RegionEnd;
+};
+
 } // namespace scudo
 
 #endif // SCUDO_COMMON_H_

diff  --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp
index b3d72de158cf..d4ea33277941 100644
--- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp
+++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp
@@ -170,6 +170,8 @@ u64 getMonotonicTime() { return _zx_clock_get_monotonic(); }
 
 u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); }
 
+u32 getThreadID() { return 0; }
+
 bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) {
   static_assert(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN, "");
   if (UNLIKELY(!Buffer || !Length || Length > MaxRandomLength))

diff  --git a/compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt
new file mode 100644
index 000000000000..d29c2f2fe749
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt
@@ -0,0 +1,12 @@
+if (LLVM_USE_SANITIZE_COVERAGE)
+  add_executable(get_error_info_fuzzer
+      get_error_info_fuzzer.cpp)
+  set_target_properties(
+      get_error_info_fuzzer PROPERTIES FOLDER "Fuzzers")
+  target_compile_options(
+      get_error_info_fuzzer PRIVATE -fsanitize=fuzzer)
+  set_target_properties(
+      get_error_info_fuzzer PROPERTIES LINK_FLAGS -fsanitize=fuzzer)
+  target_include_directories(
+      get_error_info_fuzzer PRIVATE .. ../include)
+endif()

diff  --git a/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp
new file mode 100644
index 000000000000..d29f515215e6
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp
@@ -0,0 +1,48 @@
+//===-- get_error_info_fuzzer.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SCUDO_FUZZ
+#include "allocator_config.h"
+#include "combined.h"
+
+#include <fuzzer/FuzzedDataProvider.h>
+
+#include <string>
+#include <vector>
+
+extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) {
+  using AllocatorT = scudo::Allocator<scudo::AndroidConfig>;
+  FuzzedDataProvider FDP(Data, Size);
+
+  uintptr_t FaultAddr = FDP.ConsumeIntegral<uintptr_t>();
+  uintptr_t MemoryAddr = FDP.ConsumeIntegral<uintptr_t>();
+
+  std::string MemoryAndTags = FDP.ConsumeRandomLengthString(FDP.remaining_bytes());
+  const char *Memory = MemoryAndTags.c_str();
+  // Assume 16-byte alignment.
+  size_t MemorySize = (MemoryAndTags.length() / 17) * 16;
+  const char *MemoryTags = Memory + MemorySize;
+
+  std::string StackDepotBytes = FDP.ConsumeRandomLengthString(FDP.remaining_bytes());
+  std::vector<char> StackDepot(sizeof(scudo::StackDepot), 0);
+  for (size_t i = 0; i < StackDepotBytes.length() && i < StackDepot.size(); ++i) {
+    StackDepot[i] = StackDepotBytes[i];
+  }
+
+  std::string RegionInfoBytes = FDP.ConsumeRemainingBytesAsString();
+  std::vector<char> RegionInfo(AllocatorT::getRegionInfoArraySize(), 0);
+  for (size_t i = 0; i < RegionInfoBytes.length() && i < RegionInfo.size(); ++i) {
+    RegionInfo[i] = RegionInfoBytes[i];
+  }
+
+  scudo_error_info ErrorInfo;
+  AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(),
+                           RegionInfo.data(), Memory, MemoryTags, MemoryAddr,
+                           MemorySize);
+  return 0;
+}

diff  --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
index e527d0a5d304..d30fb6514a14 100644
--- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
+++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
@@ -9,6 +9,8 @@
 #ifndef SCUDO_INTERFACE_H_
 #define SCUDO_INTERFACE_H_
 
+#include <stddef.h>
+
 extern "C" {
 
 __attribute__((weak)) const char *__scudo_default_options();
@@ -22,6 +24,87 @@ void __scudo_print_stats(void);
 
 typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg);
 
+// Determine the likely cause of a tag check fault or other memory protection
+// error on a system with memory tagging support. The results are returned via
+// the error_info data structure. Up to three possible causes are returned in
+// the reports array, in decreasing order of probability. The remaining elements
+// of reports are zero-initialized.
+//
+// This function may be called from a 
diff erent process from the one that
+// crashed. In this case, various data structures must be copied from the
+// crashing process to the process that analyzes the crash.
+//
+// This interface is not guaranteed to be stable and may change at any time.
+// Furthermore, the version of scudo in the crashing process must be the same as
+// the version in the process that analyzes the crash.
+//
+// fault_addr is the fault address. On aarch64 this is available in the system
+// register FAR_ELx, or far_context.far in an upcoming release of the Linux
+// kernel. This address must include the pointer tag; note that the kernel
+// strips the tag from the fields siginfo.si_addr and sigcontext.fault_address,
+// so these addresses are not suitable to be passed as fault_addr.
+//
+// stack_depot is a pointer to the stack depot data structure, which may be
+// obtained by calling the function __scudo_get_stack_depot_addr() in the
+// crashing process. The size of the stack depot is available by calling the
+// function __scudo_get_stack_depot_size().
+//
+// region_info is a pointer to the region info data structure, which may be
+// obtained by calling the function __scudo_get_region_info_addr() in the
+// crashing process. The size of the region info is available by calling the
+// function __scudo_get_region_info_size().
+//
+// memory is a pointer to a region of memory surrounding the fault address.
+// The more memory available via this pointer, the more likely it is that the
+// function will be able to analyze a crash correctly. It is recommended to
+// provide an amount of memory equal to 16 * the primary allocator's largest
+// size class either side of the fault address.
+//
+// memory_tags is a pointer to an array of memory tags for the memory accessed
+// via memory. Each byte of this array corresponds to a region of memory of size
+// equal to the architecturally defined memory tag granule size (16 on aarch64).
+//
+// memory_addr is the start address of memory in the crashing process's address
+// space.
+//
+// memory_size is the size of the memory region referred to by the memory
+// pointer.
+void __scudo_get_error_info(struct scudo_error_info *error_info,
+                            uintptr_t fault_addr, const char *stack_depot,
+                            const char *region_info, const char *memory,
+                            const char *memory_tags, uintptr_t memory_addr,
+                            size_t memory_size);
+
+enum scudo_error_type {
+  UNKNOWN,
+  USE_AFTER_FREE,
+  BUFFER_OVERFLOW,
+  BUFFER_UNDERFLOW,
+};
+
+struct scudo_error_report {
+  enum scudo_error_type error_type;
+
+  uintptr_t allocation_address;
+  uintptr_t allocation_size;
+
+  uint32_t allocation_tid;
+  uintptr_t allocation_trace[64];
+
+  uint32_t deallocation_tid;
+  uintptr_t deallocation_trace[64];
+};
+
+struct scudo_error_info {
+  struct scudo_error_report reports[3];
+};
+
+const char *__scudo_get_stack_depot_addr();
+size_t __scudo_get_stack_depot_size();
+
+const char *__scudo_get_region_info_addr();
+size_t __scudo_get_region_info_size();
+
 } // extern "C"
 
 #endif // SCUDO_INTERFACE_H_

diff  --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp
index 0ab96836fc44..69ffdd9a165b 100644
--- a/compiler-rt/lib/scudo/standalone/linux.cpp
+++ b/compiler-rt/lib/scudo/standalone/linux.cpp
@@ -139,6 +139,14 @@ u32 getNumberOfCPUs() {
   return static_cast<u32>(CPU_COUNT(&CPUs));
 }
 
+u32 getThreadID() {
+#if SCUDO_ANDROID
+  return static_cast<u32>(gettid());
+#else
+  return static_cast<u32>(syscall(SYS_gettid));
+#endif
+}
+
 // Blocking is possibly unused if the getrandom block is not compiled in.
 bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) {
   if (!Buffer || !Length || Length > MaxRandomLength)

diff  --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h
index 762713337541..18dae2b8ca41 100644
--- a/compiler-rt/lib/scudo/standalone/memtag.h
+++ b/compiler-rt/lib/scudo/standalone/memtag.h
@@ -21,11 +21,39 @@
 
 namespace scudo {
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(SCUDO_FUZZ)
 
 inline constexpr bool archSupportsMemoryTagging() { return true; }
 inline constexpr uptr archMemoryTagGranuleSize() { return 16; }
 
+inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); }
+
+inline uint8_t extractTag(uptr Ptr) {
+  return (Ptr >> 56) & 0xf;
+}
+
+#else
+
+inline constexpr bool archSupportsMemoryTagging() { return false; }
+
+inline uptr archMemoryTagGranuleSize() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uptr untagPointer(uptr Ptr) {
+  (void)Ptr;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uint8_t extractTag(uptr Ptr) {
+  (void)Ptr;
+  UNREACHABLE("memory tagging not supported");
+}
+
+#endif
+
+#if defined(__aarch64__)
+
 inline bool systemSupportsMemoryTagging() {
 #if defined(ANDROID_EXPERIMENTAL_MTE)
   return getauxval(AT_HWCAP2) & HWCAP2_MTE;
@@ -51,7 +79,19 @@ inline void enableMemoryTagChecksTestOnly() {
   __asm__ __volatile__(".arch_extension mte; msr tco, #0");
 }
 
-inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); }
+class ScopedDisableMemoryTagChecks {
+  size_t PrevTCO;
+
+ public:
+  ScopedDisableMemoryTagChecks() {
+    __asm__ __volatile__(".arch_extension mte; mrs %0, tco; msr tco, #1"
+                         : "=r"(PrevTCO));
+  }
+
+  ~ScopedDisableMemoryTagChecks() {
+    __asm__ __volatile__(".arch_extension mte; msr tco, %0" : : "r"(PrevTCO));
+  }
+};
 
 inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin,
                          uptr *TaggedEnd) {
@@ -154,10 +194,6 @@ inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) {
                        : "memory");
 }
 
-inline uptr tagPointer(uptr UntaggedPtr, uptr Tag) {
-  return UntaggedPtr | (Tag & (0xfUL << 56));
-}
-
 inline uptr loadTag(uptr Ptr) {
   uptr TaggedPtr = Ptr;
   __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]"
@@ -169,8 +205,6 @@ inline uptr loadTag(uptr Ptr) {
 
 #else
 
-inline constexpr bool archSupportsMemoryTagging() { return false; }
-
 inline bool systemSupportsMemoryTagging() {
   UNREACHABLE("memory tagging not supported");
 }
@@ -179,10 +213,6 @@ inline bool systemDetectsMemoryTagFaultsTestOnly() {
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uptr archMemoryTagGranuleSize() {
-  UNREACHABLE("memory tagging not supported");
-}
-
 inline void disableMemoryTagChecksTestOnly() {
   UNREACHABLE("memory tagging not supported");
 }
@@ -191,10 +221,9 @@ inline void enableMemoryTagChecksTestOnly() {
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uptr untagPointer(uptr Ptr) {
-  (void)Ptr;
-  UNREACHABLE("memory tagging not supported");
-}
+struct ScopedDisableMemoryTagChecks {
+  ScopedDisableMemoryTagChecks() {}
+};
 
 inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin,
                          uptr *TaggedEnd) {

diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 293b5610311a..29a268098185 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -206,6 +206,15 @@ class SizeClassAllocator32 {
   bool useMemoryTagging() { return false; }
   void disableMemoryTagging() {}
 
+  const char *getRegionInfoArrayAddress() const { return nullptr; }
+  static uptr getRegionInfoArraySize() { return 0; }
+
+  static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
+    (void)RegionInfoData;
+    (void)Ptr;
+    return {};
+  }
+
 private:
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr RegionSize = 1UL << RegionSizeLog;

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index b92ca2d96169..d4767882ba2c 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -209,6 +209,58 @@ class SizeClassAllocator64 {
   }
   void disableMemoryTagging() { UseMemoryTagging = false; }
 
+  const char *getRegionInfoArrayAddress() const {
+    return reinterpret_cast<const char *>(RegionInfoArray);
+  }
+
+  static uptr getRegionInfoArraySize() {
+    return sizeof(RegionInfoArray);
+  }
+
+  static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
+    const RegionInfo *RegionInfoArray =
+        reinterpret_cast<const RegionInfo *>(RegionInfoData);
+    uptr ClassId;
+    uptr MinDistance = -1UL;
+    for (uptr I = 0; I != NumClasses; ++I) {
+      if (I == SizeClassMap::BatchClassId)
+        continue;
+      uptr Begin = RegionInfoArray[I].RegionBeg;
+      uptr End = Begin + RegionInfoArray[I].AllocatedUser;
+      if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I))
+        continue;
+      uptr RegionDistance;
+      if (Begin <= Ptr) {
+        if (Ptr < End)
+          RegionDistance = 0;
+        else
+          RegionDistance = Ptr - End;
+      } else {
+        RegionDistance = Begin - Ptr;
+      }
+
+      if (RegionDistance < MinDistance) {
+        MinDistance = RegionDistance;
+        ClassId = I;
+      }
+    }
+
+    BlockInfo B = {};
+    if (MinDistance <= 8192) {
+      B.RegionBegin = RegionInfoArray[ClassId].RegionBeg;
+      B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser;
+      B.BlockSize = SizeClassMap::getSizeByClassId(ClassId);
+      B.BlockBegin =
+          B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) *
+                               sptr(B.BlockSize));
+      while (B.BlockBegin < B.RegionBegin)
+        B.BlockBegin += B.BlockSize;
+      while (B.RegionEnd < B.BlockBegin + B.BlockSize)
+        B.BlockBegin -= B.BlockSize;
+    }
+    return B;
+  }
+
 private:
   static const uptr RegionSize = 1UL << RegionSizeLog;
   static const uptr NumClasses = SizeClassMap::NumClasses;
@@ -231,7 +283,7 @@ class SizeClassAllocator64 {
     u64 LastReleaseAtNs;
   };
 
-  struct alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo {
+  struct UnpaddedRegionInfo {
     HybridMutex Mutex;
     SinglyLinkedList<TransferBatch> FreeList;
     RegionStats Stats;
@@ -244,13 +296,17 @@ class SizeClassAllocator64 {
     MapPlatformData Data;
     ReleaseToOsInfo ReleaseInfo;
   };
+  struct RegionInfo : UnpaddedRegionInfo {
+    char Padding[SCUDO_CACHE_LINE_SIZE -
+                 (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)];
+  };
   static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
 
   uptr PrimaryBase;
   MapPlatformData Data;
   atomic_s32 ReleaseToOsIntervalMs;
   bool UseMemoryTagging;
-  RegionInfo RegionInfoArray[NumClasses];
+  alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses];
 
   RegionInfo *getRegionInfo(uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);

diff  --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h
new file mode 100644
index 000000000000..c4d9b277a1c5
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/stack_depot.h
@@ -0,0 +1,144 @@
+//===-- stack_depot.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_STACK_DEPOT_H_
+#define SCUDO_STACK_DEPOT_H_
+
+#include "atomic_helpers.h"
+#include "mutex.h"
+
+namespace scudo {
+
+class MurMur2HashBuilder {
+  static const u32 M = 0x5bd1e995;
+  static const u32 Seed = 0x9747b28c;
+  static const u32 R = 24;
+  u32 H;
+
+ public:
+  explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; }
+  void add(u32 K) {
+    K *= M;
+    K ^= K >> R;
+    K *= M;
+    H *= M;
+    H ^= K;
+  }
+  u32 get() {
+    u32 X = H;
+    X ^= X >> 13;
+    X *= M;
+    X ^= X >> 15;
+    return X;
+  }
+};
+
+class StackDepot {
+  HybridMutex RingEndMu;
+  u32 RingEnd;
+
+  // This data structure stores a stack trace for each allocation and
+  // deallocation when stack trace recording is enabled, that may be looked up
+  // using a hash of the stack trace. The lower bits of the hash are an index
+  // into the Tab array, which stores an index into the Ring array where the
+  // stack traces are stored. As the name implies, Ring is a ring buffer, so a
+  // stack trace may wrap around to the start of the array.
+  //
+  // Each stack trace in Ring is prefixed by a stack trace marker consisting of
+  // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames
+  // and stack trace markers in the case where instruction pointers are 4-byte
+  // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the
+  // size of the stack trace in bits 33-63.
+  //
+  // The insert() function is potentially racy in its accesses to the Tab and
+  // Ring arrays, but find() is resilient to races in the sense that, barring
+  // hash collisions, it will either return the correct stack trace or no stack
+  // trace at all, even if two instances of insert() raced with one another.
+  // This is achieved by re-checking the hash of the stack trace before
+  // returning the trace.
+
+#ifdef SCUDO_FUZZ
+  // Use smaller table sizes for fuzzing in order to reduce input size.
+  static const uptr TabBits = 4;
+#else
+  static const uptr TabBits = 16;
+#endif
+  static const uptr TabSize = 1 << TabBits;
+  static const uptr TabMask = TabSize - 1;
+  atomic_u32 Tab[TabSize];
+
+#ifdef SCUDO_FUZZ
+  static const uptr RingBits = 4;
+#else
+  static const uptr RingBits = 19;
+#endif
+  static const uptr RingSize = 1 << RingBits;
+  static const uptr RingMask = RingSize - 1;
+  atomic_u64 Ring[RingSize];
+
+public:
+  // Insert hash of the stack trace [Begin, End) into the stack depot, and
+  // return the hash.
+  u32 insert(uptr *Begin, uptr *End) {
+    MurMur2HashBuilder B;
+    for (uptr *I = Begin; I != End; ++I)
+      B.add(u32(*I) >> 2);
+    u32 Hash = B.get();
+
+    u32 Pos = Hash & TabMask;
+    u32 RingPos = atomic_load_relaxed(&Tab[Pos]);
+    u64 Entry = atomic_load_relaxed(&Ring[RingPos]);
+    u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1;
+    if (Entry == Id)
+      return Hash;
+
+    ScopedLock Lock(RingEndMu);
+    RingPos = RingEnd;
+    atomic_store_relaxed(&Tab[Pos], RingPos);
+    atomic_store_relaxed(&Ring[RingPos], Id);
+    for (uptr *I = Begin; I != End; ++I) {
+      RingPos = (RingPos + 1) & RingMask;
+      atomic_store_relaxed(&Ring[RingPos], *I);
+    }
+    RingEnd = (RingPos + 1) & RingMask;
+    return Hash;
+  }
+
+  // Look up a stack trace by hash. Returns true if successful. The trace may be
+  // accessed via operator[] passing indexes between *RingPosPtr and
+  // *RingPosPtr + *SizePtr.
+  bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const {
+    u32 Pos = Hash & TabMask;
+    u32 RingPos = atomic_load_relaxed(&Tab[Pos]);
+    if (RingPos >= RingSize)
+      return false;
+    u64 Entry = atomic_load_relaxed(&Ring[RingPos]);
+    u64 HashWithTagBit = (u64(Hash) << 1) | 1;
+    if ((Entry & 0x1ffffffff) != HashWithTagBit)
+      return false;
+    u32 Size = Entry >> 33;
+    if (Size >= RingSize)
+      return false;
+    *RingPosPtr = (RingPos + 1) & RingMask;
+    *SizePtr = Size;
+    MurMur2HashBuilder B;
+    for (uptr I = 0; I != Size; ++I) {
+      RingPos = (RingPos + 1) & RingMask;
+      B.add(u32(atomic_load_relaxed(&Ring[RingPos])) >> 2);
+    }
+    return B.get() == Hash;
+  }
+
+  u64 operator[](uptr RingPos) const {
+    return atomic_load_relaxed(&Ring[RingPos & RingMask]);
+  }
+};
+
+} // namespace scudo
+
+#endif // SCUDO_STACK_DEPOT_H_

diff  --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
index 5a6c1a8d4087..765d7daa349d 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -219,4 +219,13 @@ INTERFACE WEAK void SCUDO_PREFIX(malloc_disable_memory_tagging)() {
   SCUDO_ALLOCATOR.disableMemoryTagging();
 }
 
+// Sets whether scudo records stack traces and other metadata for allocations
+// and deallocations. This function only has an effect if the allocator and
+// hardware support memory tagging. The program must be single threaded at the
+// point when the function is called.
+INTERFACE WEAK void
+SCUDO_PREFIX(malloc_set_track_allocation_stacks)(int track) {
+  SCUDO_ALLOCATOR.setTrackAllocationStacks(track);
+}
+
 } // extern "C"

diff  --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp
index 7a012a23bcf1..4298e69b5774 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp
@@ -48,4 +48,28 @@ static scudo::Allocator<scudo::AndroidSvelteConfig,
 // TODO(kostyak): support both allocators.
 INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); }
 
+INTERFACE void __scudo_get_error_info(
+    struct scudo_error_info *error_info, uintptr_t fault_addr,
+    const char *stack_depot, const char *region_info, const char *memory,
+    const char *memory_tags, uintptr_t memory_addr, size_t memory_size) {
+  Allocator.getErrorInfo(error_info, fault_addr, stack_depot, region_info,
+                         memory, memory_tags, memory_addr, memory_size);
+}
+
+INTERFACE const char *__scudo_get_stack_depot_addr() {
+  return Allocator.getStackDepotAddress();
+}
+
+INTERFACE size_t __scudo_get_stack_depot_size() {
+  return sizeof(scudo::StackDepot);
+}
+
+INTERFACE const char *__scudo_get_region_info_addr() {
+  return Allocator.getRegionInfoArrayAddress();
+}
+
+INTERFACE size_t __scudo_get_region_info_size() {
+  return Allocator.getRegionInfoArraySize();
+}
+
 #endif // SCUDO_ANDROID && _BIONIC