[compiler-rt] c299d19 - scudo: Add initial memory tagging support.

Thu Jan 16 13:28:24 PST 2020

Author: Peter Collingbourne
Date: 2020-01-16T13:27:49-08:00
New Revision: c299d1981deaf822dfaa06c791f3158bd6801e20

URL: https://github.com/llvm/llvm-project/commit/c299d1981deaf822dfaa06c791f3158bd6801e20
DIFF: https://github.com/llvm/llvm-project/commit/c299d1981deaf822dfaa06c791f3158bd6801e20.diff

LOG: scudo: Add initial memory tagging support.

When the hardware and operating system support the ARM Memory Tagging
Extension, tag primary allocation granules with a random tag. The granules
either side of the allocation are tagged with tag 0, which is normally
excluded from the set of tags that may be selected randomly. Memory is
also retagged with a random tag when it is freed, and we opportunistically
reuse the new tag when the block is reused to reduce overhead. This causes
linear buffer overflows to be caught deterministically and non-linear buffer
overflows and use-after-free to be caught probabilistically.

This feature is currently only enabled for the Android allocator
and depends on an experimental Linux kernel branch available here:
https://github.com/pcc/linux/tree/android-experimental-mte

All code that depends on the kernel branch is hidden behind a macro,
ANDROID_EXPERIMENTAL_MTE. This is the same macro that is used by the Android
platform and may only be defined in non-production configurations. When the
userspace interface is finalized the code will be updated to use the stable
interface and all #ifdef ANDROID_EXPERIMENTAL_MTE will be removed.

Differential Revision: https://reviews.llvm.org/D70762

Added: 
    compiler-rt/lib/scudo/standalone/memtag.h

Modified: 
    compiler-rt/lib/scudo/standalone/allocator_config.h
    compiler-rt/lib/scudo/standalone/combined.h
    compiler-rt/lib/scudo/standalone/common.h
    compiler-rt/lib/scudo/standalone/linux.cpp
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h
    compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
    compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
    compiler-rt/lib/scudo/standalone/wrappers_c.inc

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h
index 3a5aaae73674..39c962f4408c 100644

--- a/compiler-rt/lib/scudo/standalone/allocator_config.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.h
@@ -40,7 +40,9 @@ struct AndroidConfig {
   using SizeClassMap = AndroidSizeClassMap;
 #if SCUDO_CAN_USE_PRIMARY64
   // 1GB regions
-  typedef SizeClassAllocator64<SizeClassMap, 30U> Primary;
+  typedef SizeClassAllocator64<SizeClassMap, 30U,
+                               /*MaySupportMemoryTagging=*/true>
+      Primary;
 #else
   // 512KB regions
   typedef SizeClassAllocator32<SizeClassMap, 19U> Primary;

diff  --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index a0b4b2973e96..0c2c9df2a3cb 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -15,6 +15,7 @@
 #include "flags_parser.h"
 #include "interface.h"
 #include "local_cache.h"
+#include "memtag.h"
 #include "quarantine.h"
 #include "report.h"
 #include "secondary.h"
@@ -195,6 +196,13 @@ class Allocator {
     TSD->Cache.destroy(&Stats);
   }
 
+  ALWAYS_INLINE void *untagPointerMaybe(void *Ptr) {
+    if (Primary.SupportsMemoryTagging)
+      return reinterpret_cast<void *>(
+          untagPointer(reinterpret_cast<uptr>(Ptr)));
+    return Ptr;
+  }
+
   NOINLINE void *allocate(uptr Size, Chunk::Origin Origin,
                           uptr Alignment = MinAlignment,
                           bool ZeroContents = false) {
@@ -237,7 +245,7 @@ class Allocator {
 
     void *Block;
     uptr ClassId;
-    uptr BlockEnd;
+    uptr SecondaryBlockEnd;
     if (LIKELY(PrimaryT::canAllocate(NeededSize))) {
       ClassId = SizeClassMap::getClassIdBySize(NeededSize);
       DCHECK_NE(ClassId, 0U);
@@ -248,8 +256,8 @@ class Allocator {
         TSD->unlock();
     } else {
       ClassId = 0;
-      Block =
-          Secondary.allocate(NeededSize, Alignment, &BlockEnd, ZeroContents);
+      Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd,
+                                 ZeroContents);
     }
 
     if (UNLIKELY(!Block)) {
@@ -258,16 +266,81 @@ class Allocator {
       reportOutOfMemory(NeededSize);
     }
 
-    // We only need to zero the contents for Primary backed allocations. This
-    // condition is not necessarily unlikely, but since memset is costly, we
-    // might as well mark it as such.
-    if (UNLIKELY(ZeroContents && ClassId))
-      memset(Block, 0, PrimaryT::getSizeByClassId(ClassId));
-
-    const uptr UnalignedUserPtr =
-        reinterpret_cast<uptr>(Block) + Chunk::getHeaderSize();
+    const uptr BlockUptr = reinterpret_cast<uptr>(Block);
+    const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize();
     const uptr UserPtr = roundUpTo(UnalignedUserPtr, Alignment);
 
+    void *Ptr = reinterpret_cast<void *>(UserPtr);
+    void *TaggedPtr = Ptr;
+    if (ClassId) {
+      // We only need to zero or tag the contents for Primary backed
+      // allocations. We only set tags for primary allocations in order to avoid
+      // faulting potentially large numbers of pages for large secondary
+      // allocations. We assume that guard pages are enough to protect these
+      // allocations.
+      //
+      // FIXME: When the kernel provides a way to set the background tag of a
+      // mapping, we should be able to tag secondary allocations as well.
+      //
+      // When memory tagging is enabled, zeroing the contents is done as part of
+      // setting the tag.
+      if (UNLIKELY(useMemoryTagging())) {
+        uptr PrevUserPtr;
+        Chunk::UnpackedHeader Header;
+        const uptr BlockEnd = BlockUptr + PrimaryT::getSizeByClassId(ClassId);
+        // If possible, try to reuse the UAF tag that was set by deallocate().
+        // For simplicity, only reuse tags if we have the same start address as
+        // the previous allocation. This handles the majority of cases since
+        // most allocations will not be more aligned than the minimum alignment.
+        //
+        // We need to handle situations involving reclaimed chunks, and retag
+        // the reclaimed portions if necessary. In the case where the chunk is
+        // fully reclaimed, the chunk's header will be zero, which will trigger
+        // the code path for new mappings and invalid chunks that prepares the
+        // chunk from scratch. There are three possibilities for partial
+        // reclaiming:
+        //
+        // (1) Header was reclaimed, data was partially reclaimed.
+        // (2) Header was not reclaimed, all data was reclaimed (e.g. because
+        //     data started on a page boundary).
+        // (3) Header was not reclaimed, data was partially reclaimed.
+        //
+        // Case (1) will be handled in the same way as for full reclaiming,
+        // since the header will be zero.
+        //
+        // We can detect case (2) by loading the tag from the start
+        // of the chunk. If it is zero, it means that either all data was
+        // reclaimed (since we never use zero as the chunk tag), or that the
+        // previous allocation was of size zero. Either way, we need to prepare
+        // a new chunk from scratch.
+        //
+        // We can detect case (3) by moving to the next page (if covered by the
+        // chunk) and loading the tag of its first granule. If it is zero, it
+        // means that all following pages may need to be retagged. On the other
+        // hand, if it is nonzero, we can assume that all following pages are
+        // still tagged, according to the logic that if any of the pages
+        // following the next page were reclaimed, the next page would have been
+        // reclaimed as well.
+        uptr TaggedUserPtr;
+        if (getChunkFromBlock(BlockUptr, &PrevUserPtr, &Header) &&
+            PrevUserPtr == UserPtr &&
+            (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) {
+          uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes;
+          const uptr NextPage = roundUpTo(TaggedUserPtr, getPageSizeCached());
+          if (NextPage < PrevEnd && loadTag(NextPage) != NextPage)
+            PrevEnd = NextPage;
+          TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
+          resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd);
+        } else {
+          TaggedPtr = prepareTaggedChunk(Ptr, Size, BlockEnd);
+        }
+      } else if (UNLIKELY(ZeroContents)) {
+        // This condition is not necessarily unlikely, but since memset is
+        // costly, we might as well mark it as such.
+        memset(Block, 0, PrimaryT::getSizeByClassId(ClassId));
+      }
+    }
+
     Chunk::UnpackedHeader Header = {};
     if (UNLIKELY(UnalignedUserPtr != UserPtr)) {
       const uptr Offset = UserPtr - UnalignedUserPtr;
@@ -283,15 +356,15 @@ class Allocator {
     Header.ClassId = ClassId & Chunk::ClassIdMask;
     Header.State = Chunk::State::Allocated;
     Header.Origin = Origin & Chunk::OriginMask;
-    Header.SizeOrUnusedBytes = (ClassId ? Size : BlockEnd - (UserPtr + Size)) &
-                               Chunk::SizeOrUnusedBytesMask;
-    void *Ptr = reinterpret_cast<void *>(UserPtr);
+    Header.SizeOrUnusedBytes =
+        (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) &
+        Chunk::SizeOrUnusedBytesMask;
     Chunk::storeHeader(Cookie, Ptr, &Header);
 
     if (&__scudo_allocate_hook)
-      __scudo_allocate_hook(Ptr, Size);
+      __scudo_allocate_hook(TaggedPtr, Size);
 
-    return Ptr;
+    return TaggedPtr;
   }
 
   NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0,
@@ -319,6 +392,8 @@ class Allocator {
     if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)))
       reportMisalignedPointer(AllocatorAction::Deallocating, Ptr);
 
+    Ptr = untagPointerMaybe(Ptr);
+
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
 
@@ -346,6 +421,9 @@ class Allocator {
   void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) {
     initThreadMaybe();
 
+    void *OldTaggedPtr = OldPtr;
+    OldPtr = untagPointerMaybe(OldPtr);
+
     // The following cases are handled by the C wrappers.
     DCHECK_NE(OldPtr, nullptr);
     DCHECK_NE(NewSize, 0);
@@ -405,7 +483,11 @@ class Allocator {
                      : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) &
             Chunk::SizeOrUnusedBytesMask;
         Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader);
-        return OldPtr;
+        if (UNLIKELY(ClassId && useMemoryTagging()))
+          resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize,
+                            reinterpret_cast<uptr>(OldTaggedPtr) + NewSize,
+                            BlockEnd);
+        return OldTaggedPtr;
       }
     }
 
@@ -416,7 +498,7 @@ class Allocator {
     void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment);
     if (NewPtr) {
       const uptr OldSize = getSize(OldPtr, &OldHeader);
-      memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
+      memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize));
       quarantineOrDeallocateChunk(OldPtr, &OldHeader, OldSize);
     }
     return NewPtr;
@@ -489,8 +571,13 @@ class Allocator {
       uptr Chunk;
       Chunk::UnpackedHeader Header;
       if (getChunkFromBlock(Block, &Chunk, &Header) &&
-          Header.State == Chunk::State::Allocated)
-        Callback(Chunk, getSize(reinterpret_cast<void *>(Chunk), &Header), Arg);
+          Header.State == Chunk::State::Allocated) {
+        uptr TaggedChunk = Chunk;
+        if (useMemoryTagging())
+          TaggedChunk = loadTag(Chunk);
+        Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header),
+                 Arg);
+      }
     };
     Primary.iterateOverBlocks(Lambda);
     Secondary.iterateOverBlocks(Lambda);
@@ -519,6 +606,7 @@ class Allocator {
       return GuardedAlloc.getSize(Ptr);
 #endif // GWP_ASAN_HOOKS
 
+    Ptr = untagPointerMaybe(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
     // Getting the usable size of a chunk only makes sense if it's allocated.
@@ -543,11 +631,16 @@ class Allocator {
 #endif // GWP_ASAN_HOOKS
     if (!Ptr || !isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment))
       return false;
+    Ptr = untagPointerMaybe(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     return Chunk::isValid(Cookie, Ptr, &Header) &&
            Header.State == Chunk::State::Allocated;
   }
 
+  bool useMemoryTagging() { return Primary.useMemoryTagging(); }
+
+  void disableMemoryTagging() { Primary.disableMemoryTagging(); }
+
 private:
   using SecondaryT = typename Params::Secondary;
   typedef typename PrimaryT::SizeClassMap SizeClassMap;
@@ -561,6 +654,9 @@ class Allocator {
 
   static_assert(MinAlignment >= sizeof(Chunk::PackedHeader),
                 "Minimal alignment must at least cover a chunk header.");
+  static_assert(!PrimaryT::SupportsMemoryTagging ||
+                    MinAlignment >= archMemoryTagGranuleSize(),
+                "");
 
   static const u32 BlockMarker = 0x44554353U;
 
@@ -638,6 +734,10 @@ class Allocator {
   void quarantineOrDeallocateChunk(void *Ptr, Chunk::UnpackedHeader *Header,
                                    uptr Size) {
     Chunk::UnpackedHeader NewHeader = *Header;
+    if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) {
+      uptr TaggedBegin, TaggedEnd;
+      setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd);
+    }
     // If the quarantine is disabled, the actual size of a chunk is 0 or larger
     // than the maximum allowed, we return a chunk directly to the backend.
     // Logical Or can be short-circuited, which introduces unnecessary

diff  --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h
index a76eb6bbc164..a700eb5eaa3f 100644
--- a/compiler-rt/lib/scudo/standalone/common.h
+++ b/compiler-rt/lib/scudo/standalone/common.h
@@ -142,6 +142,7 @@ bool getRandom(void *Buffer, uptr Length, bool Blocking = false);
 #define MAP_ALLOWNOMEM (1U << 0)
 #define MAP_NOACCESS (1U << 1)
 #define MAP_RESIZABLE (1U << 2)
+#define MAP_MEMTAG (1U << 3)
 
 // Our platform memory mapping use is restricted to 3 scenarios:
 // - reserve memory at a random address (MAP_NOACCESS);

diff  --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp
index 8266a528f42c..3c120845c2d8 100644
--- a/compiler-rt/lib/scudo/standalone/linux.cpp
+++ b/compiler-rt/lib/scudo/standalone/linux.cpp
@@ -35,6 +35,10 @@
 #define ANDROID_PR_SET_VMA_ANON_NAME 0
 #endif
 
+#ifdef ANDROID_EXPERIMENTAL_MTE
+#include <bionic/mte_kernel.h>
+#endif
+
 namespace scudo {
 
 uptr getPageSize() { return static_cast<uptr>(sysconf(_SC_PAGESIZE)); }
@@ -50,6 +54,10 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags,
     MmapProt = PROT_NONE;
   } else {
     MmapProt = PROT_READ | PROT_WRITE;
+#if defined(__aarch64__) && defined(ANDROID_EXPERIMENTAL_MTE)
+    if (Flags & MAP_MEMTAG)
+      MmapProt |= PROT_MTE;
+#endif
   }
   if (Addr) {
     // Currently no scenario for a noaccess mapping with a fixed address.

diff  --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h
new file mode 100644
index 000000000000..762713337541
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/memtag.h
@@ -0,0 +1,231 @@
+//===-- memtag.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_MEMTAG_H_
+#define SCUDO_MEMTAG_H_
+
+#include "internal_defs.h"
+
+#if SCUDO_LINUX
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#if defined(ANDROID_EXPERIMENTAL_MTE)
+#include <bionic/mte_kernel.h>
+#endif
+#endif
+
+namespace scudo {
+
+#if defined(__aarch64__)
+
+inline constexpr bool archSupportsMemoryTagging() { return true; }
+inline constexpr uptr archMemoryTagGranuleSize() { return 16; }
+
+inline bool systemSupportsMemoryTagging() {
+#if defined(ANDROID_EXPERIMENTAL_MTE)
+  return getauxval(AT_HWCAP2) & HWCAP2_MTE;
+#else
+  return false;
+#endif
+}
+
+inline bool systemDetectsMemoryTagFaultsTestOnly() {
+#if defined(ANDROID_EXPERIMENTAL_MTE)
+  return (prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0) & PR_MTE_TCF_MASK) !=
+         PR_MTE_TCF_NONE;
+#else
+  return false;
+#endif
+}
+
+inline void disableMemoryTagChecksTestOnly() {
+  __asm__ __volatile__(".arch_extension mte; msr tco, #1");
+}
+
+inline void enableMemoryTagChecksTestOnly() {
+  __asm__ __volatile__(".arch_extension mte; msr tco, #0");
+}
+
+inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); }
+
+inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin,
+                         uptr *TaggedEnd) {
+  void *End;
+  __asm__ __volatile__(
+      R"(
+    .arch_extension mte
+
+    // Set a random tag for Ptr in TaggedPtr. This needs to happen even if
+    // Size = 0 so that TaggedPtr ends up pointing at a valid address.
+    irg %[TaggedPtr], %[Ptr]
+    mov %[Cur], %[TaggedPtr]
+
+    // Skip the loop if Size = 0. We don't want to do any tagging in this case.
+    cbz %[Size], 2f
+
+    // Set the memory tag of the region
+    // [TaggedPtr, TaggedPtr + roundUpTo(Size, 16))
+    // to the pointer tag stored in TaggedPtr.
+    add %[End], %[TaggedPtr], %[Size]
+
+  1:
+    stzg %[Cur], [%[Cur]], #16
+    cmp %[Cur], %[End]
+    b.lt 1b
+
+  2:
+  )"
+      : [ TaggedPtr ] "=&r"(*TaggedBegin), [ Cur ] "=&r"(*TaggedEnd),
+        [ End ] "=&r"(End)
+      : [ Ptr ] "r"(Ptr), [ Size ] "r"(Size)
+      : "memory");
+}
+
+inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) {
+  // Prepare the granule before the chunk to store the chunk header by setting
+  // its tag to 0. Normally its tag will already be 0, but in the case where a
+  // chunk holding a low alignment allocation is reused for a higher alignment
+  // allocation, the chunk may already have a non-zero tag from the previous
+  // allocation.
+  __asm__ __volatile__(".arch_extension mte; stg %0, [%0, #-16]"
+                       :
+                       : "r"(Ptr)
+                       : "memory");
+
+  uptr TaggedBegin, TaggedEnd;
+  setRandomTag(Ptr, Size, &TaggedBegin, &TaggedEnd);
+
+  // Finally, set the tag of the granule past the end of the allocation to 0,
+  // to catch linear overflows even if a previous larger allocation used the
+  // same block and tag. Only do this if the granule past the end is in our
+  // block, because this would otherwise lead to a SEGV if the allocation
+  // covers the entire block and our block is at the end of a mapping. The tag
+  // of the next block's header granule will be set to 0, so it will serve the
+  // purpose of catching linear overflows in this case.
+  uptr UntaggedEnd = untagPointer(TaggedEnd);
+  if (UntaggedEnd != BlockEnd)
+    __asm__ __volatile__(".arch_extension mte; stg %0, [%0]"
+                         :
+                         : "r"(UntaggedEnd)
+                         : "memory");
+  return reinterpret_cast<void *>(TaggedBegin);
+}
+
+inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) {
+  uptr RoundOldPtr = roundUpTo(OldPtr, 16);
+  if (RoundOldPtr >= NewPtr) {
+    // If the allocation is shrinking we just need to set the tag past the end
+    // of the allocation to 0. See explanation in prepareTaggedChunk above.
+    uptr RoundNewPtr = untagPointer(roundUpTo(NewPtr, 16));
+    if (RoundNewPtr != BlockEnd)
+      __asm__ __volatile__(".arch_extension mte; stg %0, [%0]"
+                           :
+                           : "r"(RoundNewPtr)
+                           : "memory");
+    return;
+  }
+
+  __asm__ __volatile__(R"(
+    .arch_extension mte
+
+    // Set the memory tag of the region
+    // [roundUpTo(OldPtr, 16), roundUpTo(NewPtr, 16))
+    // to the pointer tag stored in OldPtr.
+  1:
+    stzg %[Cur], [%[Cur]], #16
+    cmp %[Cur], %[End]
+    b.lt 1b
+
+    // Finally, set the tag of the granule past the end of the allocation to 0.
+    and %[Cur], %[Cur], #(1 << 56) - 1
+    cmp %[Cur], %[BlockEnd]
+    b.eq 2f
+    stg %[Cur], [%[Cur]]
+
+  2:
+  )"
+                       : [ Cur ] "+&r"(RoundOldPtr), [ End ] "+&r"(NewPtr)
+                       : [ BlockEnd ] "r"(BlockEnd)
+                       : "memory");
+}
+
+inline uptr tagPointer(uptr UntaggedPtr, uptr Tag) {
+  return UntaggedPtr | (Tag & (0xfUL << 56));
+}
+
+inline uptr loadTag(uptr Ptr) {
+  uptr TaggedPtr = Ptr;
+  __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]"
+                       : "+r"(TaggedPtr)
+                       :
+                       : "memory");
+  return TaggedPtr;
+}
+
+#else
+
+inline constexpr bool archSupportsMemoryTagging() { return false; }
+
+inline bool systemSupportsMemoryTagging() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline bool systemDetectsMemoryTagFaultsTestOnly() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uptr archMemoryTagGranuleSize() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void disableMemoryTagChecksTestOnly() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void enableMemoryTagChecksTestOnly() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uptr untagPointer(uptr Ptr) {
+  (void)Ptr;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void setRandomTag(void *Ptr, uptr Size, uptr *TaggedBegin,
+                         uptr *TaggedEnd) {
+  (void)Ptr;
+  (void)Size;
+  (void)TaggedBegin;
+  (void)TaggedEnd;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void *prepareTaggedChunk(void *Ptr, uptr Size, uptr BlockEnd) {
+  (void)Ptr;
+  (void)Size;
+  (void)BlockEnd;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr BlockEnd) {
+  (void)OldPtr;
+  (void)NewPtr;
+  (void)BlockEnd;
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline uptr loadTag(uptr Ptr) {
+  (void)Ptr;
+  UNREACHABLE("memory tagging not supported");
+}
+
+#endif
+
+} // namespace scudo
+
+#endif

diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index e296a78778e0..152df2664842 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -46,6 +46,7 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator32 {
   typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef typename CacheT::TransferBatch TransferBatch;
+  static const bool SupportsMemoryTagging = false;
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -186,6 +187,9 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator32 {
     return TotalReleasedBytes;
   }
 
+  bool useMemoryTagging() { return false; }
+  void disableMemoryTagging() {}
+
 private:
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr RegionSize = 1UL << RegionSizeLog;

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index ef02f0b772d6..243460f493b0 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -13,6 +13,7 @@
 #include "common.h"
 #include "list.h"
 #include "local_cache.h"
+#include "memtag.h"
 #include "release.h"
 #include "stats.h"
 #include "string_utils.h"
@@ -38,12 +39,18 @@ namespace scudo {
 // The memory used by this allocator is never unmapped, but can be partially
 // released if the platform allows for it.
 
-template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
+template <class SizeClassMapT, uptr RegionSizeLog,
+          bool MaySupportMemoryTagging = false>
+class SizeClassAllocator64 {
 public:
   typedef SizeClassMapT SizeClassMap;
-  typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog> ThisT;
+  typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog,
+                               MaySupportMemoryTagging>
+      ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef typename CacheT::TransferBatch TransferBatch;
+  static const bool SupportsMemoryTagging =
+      MaySupportMemoryTagging && archSupportsMemoryTagging();
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -85,6 +92,9 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
       Region->RandState = getRandomU32(&Seed);
     }
     ReleaseToOsIntervalMs = ReleaseToOsInterval;
+
+    if (SupportsMemoryTagging)
+      UseMemoryTagging = systemSupportsMemoryTagging();
   }
   void init(s32 ReleaseToOsInterval) {
     memset(this, 0, sizeof(*this));
@@ -189,6 +199,11 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
     return TotalReleasedBytes;
   }
 
+  bool useMemoryTagging() const {
+    return SupportsMemoryTagging && UseMemoryTagging;
+  }
+  void disableMemoryTagging() { UseMemoryTagging = false; }
+
 private:
   static const uptr RegionSize = 1UL << RegionSizeLog;
   static const uptr NumClasses = SizeClassMap::NumClasses;
@@ -230,6 +245,7 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
   RegionInfo *RegionInfoArray;
   MapPlatformData Data;
   s32 ReleaseToOsIntervalMs;
+  bool UseMemoryTagging;
 
   RegionInfo *getRegionInfo(uptr ClassId) const {
     DCHECK_LT(ClassId, NumClasses);
@@ -294,7 +310,9 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
         Region->Data = Data;
       if (UNLIKELY(!map(reinterpret_cast<void *>(RegionBeg + MappedUser),
                         UserMapSize, "scudo:primary",
-                        MAP_ALLOWNOMEM | MAP_RESIZABLE, &Region->Data)))
+                        MAP_ALLOWNOMEM | MAP_RESIZABLE |
+                            (useMemoryTagging() ? MAP_MEMTAG : 0),
+                        &Region->Data)))
         return nullptr;
       Region->MappedUser += UserMapSize;
       C->getStats().add(StatMapped, UserMapSize);

diff  --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index fec5f864aeb7..b4ce4d0ef717 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -22,6 +22,51 @@ static bool Ready = false;
 
 static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc;
 
+static void disableDebuggerdMaybe() {
+#if SCUDO_ANDROID
+  // Disable the debuggerd signal handler on Android, without this we can end
+  // up spending a significant amount of time creating tombstones.
+  signal(SIGSEGV, SIG_DFL);
+#endif
+}
+
+template <class AllocatorT>
+bool isTaggedAllocation(AllocatorT *Allocator, scudo::uptr Size,
+                        scudo::uptr Alignment) {
+  if (!Allocator->useMemoryTagging() ||
+      !scudo::systemDetectsMemoryTagFaultsTestOnly())
+    return false;
+
+  const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG;
+  if (Alignment < MinAlignment)
+    Alignment = MinAlignment;
+  const scudo::uptr NeededSize =
+      scudo::roundUpTo(Size, MinAlignment) +
+      ((Alignment > MinAlignment) ? Alignment : scudo::Chunk::getHeaderSize());
+  return AllocatorT::PrimaryT::canAllocate(NeededSize);
+}
+
+template <class AllocatorT>
+void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size,
+                             scudo::uptr Alignment) {
+  if (!isTaggedAllocation(Allocator, Size, Alignment))
+    return;
+
+  Size = scudo::roundUpTo(Size, scudo::archMemoryTagGranuleSize());
+  EXPECT_DEATH(
+      {
+        disableDebuggerdMaybe();
+        reinterpret_cast<char *>(P)[-1] = 0xaa;
+      },
+      "");
+  EXPECT_DEATH(
+      {
+        disableDebuggerdMaybe();
+        reinterpret_cast<char *>(P)[Size] = 0xaa;
+      },
+      "");
+}
+
 template <class Config> static void testAllocator() {
   using AllocatorT = scudo::Allocator<Config>;
   auto Deleter = [](AllocatorT *A) {
@@ -56,6 +101,7 @@ template <class Config> static void testAllocator() {
         EXPECT_TRUE(scudo::isAligned(reinterpret_cast<scudo::uptr>(P), Align));
         EXPECT_LE(Size, Allocator->getUsableSize(P));
         memset(P, 0xaa, Size);
+        checkMemoryTaggingMaybe(Allocator.get(), P, Size, Align);
         Allocator->deallocate(P, Origin, Size);
       }
     }
@@ -83,7 +129,8 @@ template <class Config> static void testAllocator() {
   bool Found = false;
   for (scudo::uptr I = 0; I < 1024U && !Found; I++) {
     void *P = Allocator->allocate(NeedleSize, Origin);
-    if (P == NeedleP)
+    if (Allocator->untagPointerMaybe(P) ==
+        Allocator->untagPointerMaybe(NeedleP))
       Found = true;
     Allocator->deallocate(P, Origin);
   }
@@ -120,6 +167,7 @@ template <class Config> static void testAllocator() {
     EXPECT_EQ(NewP, P);
     for (scudo::uptr I = 0; I < DataSize - 32; I++)
       EXPECT_EQ((reinterpret_cast<char *>(NewP))[I], Marker);
+    checkMemoryTaggingMaybe(Allocator.get(), NewP, NewSize, 0);
   }
   Allocator->deallocate(P, Origin);
 
@@ -148,6 +196,58 @@ template <class Config> static void testAllocator() {
 
   Allocator->releaseToOS();
 
+  if (Allocator->useMemoryTagging() &&
+      scudo::systemDetectsMemoryTagFaultsTestOnly()) {
+    // Check that use-after-free is detected.
+    for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) {
+      const scudo::uptr Size = 1U << SizeLog;
+      if (!isTaggedAllocation(Allocator.get(), Size, 1))
+        continue;
+      // UAF detection is probabilistic, so we repeat the test up to 256 times
+      // if necessary. With 15 possible tags this means a 1 in 15^256 chance of
+      // a false positive.
+      EXPECT_DEATH(
+          {
+            disableDebuggerdMaybe();
+            for (unsigned I = 0; I != 256; ++I) {
+              void *P = Allocator->allocate(Size, Origin);
+              Allocator->deallocate(P, Origin);
+              reinterpret_cast<char *>(P)[0] = 0xaa;
+            }
+          },
+          "");
+      EXPECT_DEATH(
+          {
+            disableDebuggerdMaybe();
+            for (unsigned I = 0; I != 256; ++I) {
+              void *P = Allocator->allocate(Size, Origin);
+              Allocator->deallocate(P, Origin);
+              reinterpret_cast<char *>(P)[Size - 1] = 0xaa;
+            }
+          },
+          "");
+    }
+
+    // Check that disabling memory tagging works correctly.
+    void *P = Allocator->allocate(2048, Origin);
+    EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 0xaa, "");
+    scudo::disableMemoryTagChecksTestOnly();
+    Allocator->disableMemoryTagging();
+    reinterpret_cast<char *>(P)[2048] = 0xaa;
+    Allocator->deallocate(P, Origin);
+
+    P = Allocator->allocate(2048, Origin);
+    EXPECT_EQ(Allocator->untagPointerMaybe(P), P);
+    reinterpret_cast<char *>(P)[2048] = 0xaa;
+    Allocator->deallocate(P, Origin);
+
+    Allocator->releaseToOS();
+
+    // Disabling memory tag checks may interfere with subsequent tests.
+    // Re-enable them now.
+    scudo::enableMemoryTagChecksTestOnly();
+  }
+
   scudo::uptr BufferSize = 8192;
   std::vector<char> Buffer(BufferSize);
   scudo::uptr ActualSize = Allocator->getStats(Buffer.data(), BufferSize);

diff  --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index 64b625e79bf2..010bf84490e0 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -58,6 +58,7 @@ TEST(ScudoPrimaryTest, BasicPrimary) {
   testPrimary<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testPrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testPrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
 
 // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes.
@@ -143,6 +144,7 @@ TEST(ScudoPrimaryTest, PrimaryIterate) {
   testIteratePrimary<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testIteratePrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testIteratePrimary<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
 
 static std::mutex Mutex;
@@ -202,6 +204,7 @@ TEST(ScudoPrimaryTest, PrimaryThreaded) {
   testPrimaryThreaded<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }
 
 // Through a simple allocation that spans two pages, verify that releaseToOS
@@ -232,4 +235,5 @@ TEST(ScudoPrimaryTest, ReleaseToOS) {
   testReleaseToOS<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
 #endif
   testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+  testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U, true>>();
 }

diff  --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
index 2fd709eaa1f6..d054a616539b 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -184,4 +184,12 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) {
   return 0;
 }
 
+// Disable memory tagging for the heap. The caller must disable memory tag
+// checks globally (e.g. by clearing TCF0 on aarch64) before calling this
+// function, and may not re-enable them after calling the function. The program
+// must be single threaded at the point when the function is called.
+INTERFACE WEAK void SCUDO_PREFIX(malloc_disable_memory_tagging)() {
+  SCUDO_ALLOCATOR.disableMemoryTagging();
+}
+
 } // extern "C"