[compiler-rt] 7bd75b6 - scudo: Add an API for disabling memory initialization per-thread.

Fri Sep 18 12:11:53 PDT 2020

Author: Peter Collingbourne
Date: 2020-09-18T12:04:27-07:00
New Revision: 7bd75b630144ec639dbbf7bcb2797f48380b953b

URL: https://github.com/llvm/llvm-project/commit/7bd75b630144ec639dbbf7bcb2797f48380b953b
DIFF: https://github.com/llvm/llvm-project/commit/7bd75b630144ec639dbbf7bcb2797f48380b953b.diff

LOG: scudo: Add an API for disabling memory initialization per-thread.

Here "memory initialization" refers to zero- or pattern-init on
non-MTE hardware, or (where possible to avoid) memory tagging on MTE
hardware. With shared TSD the per-thread memory initialization state
is stored in bit 0 of the TLS slot, similar to PointerIntPair in LLVM.

Differential Revision: https://reviews.llvm.org/D87739

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/chunk.h
    compiler-rt/lib/scudo/standalone/combined.h
    compiler-rt/lib/scudo/standalone/common.h
    compiler-rt/lib/scudo/standalone/include/scudo/interface.h
    compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
    compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
    compiler-rt/lib/scudo/standalone/tsd_exclusive.h
    compiler-rt/lib/scudo/standalone/tsd_shared.h
    compiler-rt/lib/scudo/standalone/wrappers_c.inc

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h
index f4d68b3ac6c4..69b8e1b12a91 100644

--- a/compiler-rt/lib/scudo/standalone/chunk.h
+++ b/compiler-rt/lib/scudo/standalone/chunk.h
@@ -65,7 +65,8 @@ typedef u64 PackedHeader;
 struct UnpackedHeader {
   uptr ClassId : 8;
   u8 State : 2;
-  u8 Origin : 2;
+  // Origin if State == Allocated, or WasZeroed otherwise.
+  u8 OriginOrWasZeroed : 2;
   uptr SizeOrUnusedBytes : 20;
   uptr Offset : 16;
   uptr Checksum : 16;

diff  --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 465e581cf513..8be6ae820c89 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -275,8 +275,10 @@ class Allocator {
     }
 #endif // GWP_ASAN_HOOKS
 
-    const FillContentsMode FillContents =
-        ZeroContents ? ZeroFill : Options.FillContents;
+    const FillContentsMode FillContents = ZeroContents ? ZeroFill
+                                          : TSDRegistry.getDisableMemInit()
+                                              ? NoFill
+                                              : Options.FillContents;
 
     if (UNLIKELY(Alignment > MaxAlignment)) {
       if (Options.MayReturnNull)
@@ -405,7 +407,17 @@ class Allocator {
             PrevEnd = NextPage;
           TaggedPtr = reinterpret_cast<void *>(TaggedUserPtr);
           resizeTaggedChunk(PrevEnd, TaggedUserPtr + Size, BlockEnd);
-          if (Size) {
+          if (UNLIKELY(FillContents != NoFill && !Header.OriginOrWasZeroed)) {
+            // If an allocation needs to be zeroed (i.e. calloc) we can normally
+            // avoid zeroing the memory now since we can rely on memory having
+            // been zeroed on free, as this is normally done while setting the
+            // UAF tag. But if tagging was disabled per-thread when the memory
+            // was freed, it would not have been retagged and thus zeroed, and
+            // therefore it needs to be zeroed now.
+            memset(TaggedPtr, 0,
+                   Min(Size, roundUpTo(PrevEnd - TaggedUserPtr,
+                                       archMemoryTagGranuleSize())));
+          } else if (Size) {
             // Clear any stack metadata that may have previously been stored in
             // the chunk data.
             memset(TaggedPtr, 0, archMemoryTagGranuleSize());
@@ -438,7 +450,7 @@ class Allocator {
     }
     Header.ClassId = ClassId & Chunk::ClassIdMask;
     Header.State = Chunk::State::Allocated;
-    Header.Origin = Origin & Chunk::OriginMask;
+    Header.OriginOrWasZeroed = Origin & Chunk::OriginMask;
     Header.SizeOrUnusedBytes =
         (ClassId ? Size : SecondaryBlockEnd - (UserPtr + Size)) &
         Chunk::SizeOrUnusedBytesMask;
@@ -483,12 +495,12 @@ class Allocator {
     if (UNLIKELY(Header.State != Chunk::State::Allocated))
       reportInvalidChunkState(AllocatorAction::Deallocating, Ptr);
     if (Options.DeallocTypeMismatch) {
-      if (Header.Origin != Origin) {
+      if (Header.OriginOrWasZeroed != Origin) {
         // With the exception of memalign'd chunks, that can be still be free'd.
-        if (UNLIKELY(Header.Origin != Chunk::Origin::Memalign ||
+        if (UNLIKELY(Header.OriginOrWasZeroed != Chunk::Origin::Memalign ||
                      Origin != Chunk::Origin::Malloc))
           reportDeallocTypeMismatch(AllocatorAction::Deallocating, Ptr,
-                                    Header.Origin, Origin);
+                                    Header.OriginOrWasZeroed, Origin);
       }
     }
 
@@ -541,9 +553,10 @@ class Allocator {
     // applications think that it is OK to realloc a memalign'ed pointer, which
     // will trigger this check. It really isn't.
     if (Options.DeallocTypeMismatch) {
-      if (UNLIKELY(OldHeader.Origin != Chunk::Origin::Malloc))
+      if (UNLIKELY(OldHeader.OriginOrWasZeroed != Chunk::Origin::Malloc))
         reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr,
-                                  OldHeader.Origin, Chunk::Origin::Malloc);
+                                  OldHeader.OriginOrWasZeroed,
+                                  Chunk::Origin::Malloc);
     }
 
     void *BlockBegin = getBlockBegin(OldPtr, &OldHeader);
@@ -1017,14 +1030,17 @@ class Allocator {
     Chunk::UnpackedHeader NewHeader = *Header;
     if (UNLIKELY(NewHeader.ClassId && useMemoryTagging())) {
       u8 PrevTag = extractTag(loadTag(reinterpret_cast<uptr>(Ptr)));
-      uptr TaggedBegin, TaggedEnd;
-      const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe(
-          reinterpret_cast<uptr>(getBlockBegin(Ptr, &NewHeader)),
-          SizeClassMap::getSizeByClassId(NewHeader.ClassId));
-      // Exclude the previous tag so that immediate use after free is detected
-      // 100% of the time.
-      setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin,
-                   &TaggedEnd);
+      if (!TSDRegistry.getDisableMemInit()) {
+        uptr TaggedBegin, TaggedEnd;
+        const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe(
+            reinterpret_cast<uptr>(getBlockBegin(Ptr, &NewHeader)),
+            SizeClassMap::getSizeByClassId(NewHeader.ClassId));
+        // Exclude the previous tag so that immediate use after free is detected
+        // 100% of the time.
+        setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin,
+                     &TaggedEnd);
+      }
+      NewHeader.OriginOrWasZeroed = !TSDRegistry.getDisableMemInit();
       storeDeallocationStackMaybe(Ptr, PrevTag);
     }
     // If the quarantine is disabled, the actual size of a chunk is 0 or larger

diff  --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h
index b3bce6ee291a..662b733050bb 100644
--- a/compiler-rt/lib/scudo/standalone/common.h
+++ b/compiler-rt/lib/scudo/standalone/common.h
@@ -185,6 +185,8 @@ struct BlockInfo {
 enum class Option : u8 {
   ReleaseInterval,      // Release to OS interval in milliseconds.
   MemtagTuning,         // Whether to tune tagging for UAF or overflow.
+  ThreadDisableMemInit, // Whether to disable automatic heap initialization and,
+                        // where possible, memory tagging, on this thread.
   MaxCacheEntriesCount, // Maximum number of blocks that can be cached.
   MaxCacheEntrySize,    // Maximum size of a block that can be cached.
   MaxTSDsCount,         // Number of usable TSDs for the shared registry.

diff  --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
index 7e65b68ab36d..0736af1f2dc2 100644
--- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
+++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h
@@ -121,6 +121,14 @@ size_t __scudo_get_region_info_size();
 #define M_MEMTAG_TUNING -102
 #endif
 
+// Per-thread memory initialization tuning. The value argument should be one of:
+// 1: Disable automatic heap initialization and, where possible, memory tagging,
+//    on this thread.
+// 0: Normal behavior.
+#ifndef M_THREAD_DISABLE_MEM_INIT
+#define M_THREAD_DISABLE_MEM_INIT -103
+#endif
+
 #ifndef M_CACHE_COUNT_MAX
 #define M_CACHE_COUNT_MAX -200
 #endif

diff  --git a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
index 13da70eff85b..6458e23e1423 100644
--- a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp
@@ -41,7 +41,7 @@ TEST(ScudoChunkTest, ChunkCmpXchg) {
   initChecksum();
   const scudo::uptr Size = 0x100U;
   scudo::Chunk::UnpackedHeader OldHeader = {};
-  OldHeader.Origin = scudo::Chunk::Origin::Malloc;
+  OldHeader.OriginOrWasZeroed = scudo::Chunk::Origin::Malloc;
   OldHeader.ClassId = 0x42U;
   OldHeader.SizeOrUnusedBytes = Size;
   OldHeader.State = scudo::Chunk::State::Allocated;

diff  --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 481158308c43..9fe7e249f705 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -512,3 +512,44 @@ TEST(ScudoCombinedTest, OddEven) {
     EXPECT_TRUE(Found);
   }
 }
+
+TEST(ScudoCombinedTest, DisableMemInit) {
+  using AllocatorT = TestAllocator<scudo::AndroidConfig>;
+  using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap;
+  auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
+
+  std::vector<void *> Ptrs(65536, nullptr);
+
+  Allocator->setOption(scudo::Option::ThreadDisableMemInit, 1);
+
+  constexpr scudo::uptr MinAlignLog = FIRST_32_SECOND_64(3U, 4U);
+
+  // Test that if mem-init is disabled on a thread, calloc should still work as
+  // expected. This is tricky to ensure when MTE is enabled, so this test tries
+  // to exercise the relevant code on our MTE path.
+  for (scudo::uptr ClassId = 1U; ClassId <= 8; ClassId++) {
+    const scudo::uptr Size =
+        SizeClassMap::getSizeByClassId(ClassId) - scudo::Chunk::getHeaderSize();
+    if (Size < 8)
+      continue;
+    for (unsigned I = 0; I != Ptrs.size(); ++I) {
+      Ptrs[I] = Allocator->allocate(Size, Origin);
+      memset(Ptrs[I], 0xaa, Size);
+    }
+    for (unsigned I = 0; I != Ptrs.size(); ++I)
+      Allocator->deallocate(Ptrs[I], Origin, Size);
+    for (unsigned I = 0; I != Ptrs.size(); ++I) {
+      Ptrs[I] = Allocator->allocate(Size - 8, Origin);
+      memset(Ptrs[I], 0xbb, Size - 8);
+    }
+    for (unsigned I = 0; I != Ptrs.size(); ++I)
+      Allocator->deallocate(Ptrs[I], Origin, Size - 8);
+    for (unsigned I = 0; I != Ptrs.size(); ++I) {
+      Ptrs[I] = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true);
+      for (scudo::uptr J = 0; J < Size; ++J)
+        ASSERT_EQ((reinterpret_cast<char *>(Ptrs[I]))[J], 0);
+    }
+  }
+
+  Allocator->setOption(scudo::Option::ThreadDisableMemInit, 0);
+}

diff  --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
index 9437167d8482..1704c8cf80d8 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
@@ -13,10 +13,13 @@
 
 namespace scudo {
 
-enum class ThreadState : u8 {
-  NotInitialized = 0,
-  Initialized,
-  TornDown,
+struct ThreadState {
+  bool DisableMemInit : 1;
+  enum {
+    NotInitialized = 0,
+    Initialized,
+    TornDown,
+  } InitState : 2;
 };
 
 template <class Allocator> void teardownThread(void *Ptr);
@@ -36,13 +39,13 @@ template <class Allocator> struct TSDRegistryExT {
   void unmapTestOnly() {}
 
   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
-    if (LIKELY(State != ThreadState::NotInitialized))
+    if (LIKELY(State.InitState != ThreadState::NotInitialized))
       return;
     initThread(Instance, MinimalInit);
   }
 
   ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) {
-    if (LIKELY(State == ThreadState::Initialized &&
+    if (LIKELY(State.InitState == ThreadState::Initialized &&
                !atomic_load(&Disabled, memory_order_acquire))) {
       *UnlockRequired = false;
       return &ThreadTSD;
@@ -67,11 +70,15 @@ template <class Allocator> struct TSDRegistryExT {
   }
 
   bool setOption(Option O, UNUSED sptr Value) {
+    if (O == Option::ThreadDisableMemInit)
+      State.DisableMemInit = Value;
     if (O == Option::MaxTSDsCount)
       return false;
     return true;
   }
 
+  bool getDisableMemInit() { return State.DisableMemInit; }
+
 private:
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
@@ -90,7 +97,7 @@ template <class Allocator> struct TSDRegistryExT {
     CHECK_EQ(
         pthread_setspecific(PThreadKey, reinterpret_cast<void *>(Instance)), 0);
     ThreadTSD.initLinkerInitialized(Instance);
-    State = ThreadState::Initialized;
+    State.InitState = ThreadState::Initialized;
     Instance->callPostInitCallback();
   }
 
@@ -126,7 +133,7 @@ template <class Allocator> void teardownThread(void *Ptr) {
       return;
   }
   TSDRegistryT::ThreadTSD.commitBack(Instance);
-  TSDRegistryT::State = ThreadState::TornDown;
+  TSDRegistryT::State.InitState = ThreadState::TornDown;
 }
 
 } // namespace scudo

diff  --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
index 041b834c7485..2cacfde859b0 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -83,10 +83,14 @@ struct TSDRegistrySharedT {
   bool setOption(Option O, sptr Value) {
     if (O == Option::MaxTSDsCount)
       return setNumberOfTSDs(static_cast<u32>(Value));
+    if (O == Option::ThreadDisableMemInit)
+      setDisableMemInit(Value);
     // Not supported by the TSD Registry, but not an error either.
     return true;
   }
 
+  bool getDisableMemInit() const { return *getTlsPtr() & 1; }
+
 private:
   ALWAYS_INLINE uptr *getTlsPtr() const {
 #if SCUDO_HAS_PLATFORM_TLS_SLOT
@@ -97,12 +101,15 @@ struct TSDRegistrySharedT {
 #endif
   }
 
+  static_assert(alignof(TSD<Allocator>) >= 2, "");
+
   ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) {
-    *getTlsPtr() = reinterpret_cast<uptr>(CurrentTSD);
+    *getTlsPtr() &= 1;
+    *getTlsPtr() |= reinterpret_cast<uptr>(CurrentTSD);
   }
 
   ALWAYS_INLINE TSD<Allocator> *getCurrentTSD() {
-    return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr());
+    return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL);
   }
 
   bool setNumberOfTSDs(u32 N) {
@@ -131,6 +138,11 @@ struct TSDRegistrySharedT {
     return true;
   }
 
+  void setDisableMemInit(bool B) {
+    *getTlsPtr() &= ~1ULL;
+    *getTlsPtr() |= B;
+  }
+
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
     if (LIKELY(Initialized))

diff  --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
index b25135b1ce1b..7386a0053a0f 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -179,6 +179,9 @@ INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, int value) {
     case M_MEMTAG_TUNING:
       option = scudo::Option::MemtagTuning;
       break;
+    case M_THREAD_DISABLE_MEM_INIT:
+      option = scudo::Option::ThreadDisableMemInit;
+      break;
     case M_CACHE_COUNT_MAX:
       option = scudo::Option::MaxCacheEntriesCount;
       break;