[compiler-rt] r319791 - [scudo] Get rid of the thread local PRNG & header salt

Tue Dec 5 09:08:30 PST 2017

Author: cryptoad
Date: Tue Dec  5 09:08:29 2017
New Revision: 319791

URL: http://llvm.org/viewvc/llvm-project?rev=319791&view=rev
Log:
[scudo] Get rid of the thread local PRNG & header salt

Summary:
It was deemed that the salt in the chunk header didn't improve security
significantly (and could actually decrease it). The initial idea was that the
same chunk would different headers on different allocations, allowing for less
predictability. The issue is that gathering the same chunk header with different
salts can give information about the other "secrets" (cookie, pointer), and that
if an attacker leaks a header, they can reuse it anyway for that same chunk
anyway since we don't enforce the salt value.

So we get rid of the salt in the header. This means we also get rid of the
thread local Prng, and that we don't need a global Prng anymore as well. This
makes everything faster.

We reuse those 8 bits to store the `ClassId` of a chunk now (0 for a secondary
based allocation). This way, we get some additional speed gains:
- `ClassId` is computed outside of the locked block;
- `getActuallyAllocatedSize` doesn't need the `GetSizeClass` call;
- same for `deallocatePrimary`;
We add a sanity check at init for this new field (all sanity checks are moved
in their own function, `init` was getting crowded).

Reviewers: alekseyshl, flowerhack

Reviewed By: alekseyshl

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D40796

Modified:
    compiler-rt/trunk/lib/scudo/scudo_allocator.cpp
    compiler-rt/trunk/lib/scudo/scudo_allocator.h
    compiler-rt/trunk/lib/scudo/scudo_allocator_combined.h
    compiler-rt/trunk/lib/scudo/scudo_tsd.h
    compiler-rt/trunk/lib/scudo/scudo_utils.h

Modified: compiler-rt/trunk/lib/scudo/scudo_allocator.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/scudo/scudo_allocator.cpp?rev=319791&r1=319790&r2=319791&view=diff
==============================================================================

--- compiler-rt/trunk/lib/scudo/scudo_allocator.cpp (original)
+++ compiler-rt/trunk/lib/scudo/scudo_allocator.cpp Tue Dec  5 09:08:29 2017
@@ -30,7 +30,7 @@
 namespace __scudo {
 
 // Global static cookie, initialized at start-up.
-static uptr Cookie;
+static u32 Cookie;
 
 // We default to software CRC32 if the alternatives are not supported, either
 // at compilation or at runtime.
@@ -66,7 +66,7 @@ struct ScudoChunk : UnpackedHeader {
   // We can't use the offset member of the chunk itself, as we would double
   // fetch it without any warranty that it wouldn't have been tampered. To
   // prevent this, we work with a local copy of the header.
-  void *getAllocBeg(UnpackedHeader *Header) {
+  void *getBackendPtr(UnpackedHeader *Header) {
     return reinterpret_cast<void *>(
         reinterpret_cast<uptr>(this) - (Header->Offset << MinAlignmentLog));
   }
@@ -74,9 +74,9 @@ struct ScudoChunk : UnpackedHeader {
   // Returns the usable size for a chunk, meaning the amount of bytes from the
   // beginning of the user data to the end of the backend allocated chunk.
   uptr getUsableSize(UnpackedHeader *Header) {
-    uptr Size =
-        getBackendAllocator().getActuallyAllocatedSize(getAllocBeg(Header),
-                                                       Header->FromPrimary);
+    const uptr Size =
+        getBackendAllocator().getActuallyAllocatedSize(getBackendPtr(Header),
+                                                       Header->ClassId);
     if (Size == 0)
       return 0;
     return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog);
@@ -88,8 +88,7 @@ struct ScudoChunk : UnpackedHeader {
     ZeroChecksumHeader.Checksum = 0;
     uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)];
     memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder));
-    u32 Crc = computeCRC32(static_cast<u32>(Cookie),
-                           reinterpret_cast<uptr>(this), HeaderHolder,
+    u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(this), HeaderHolder,
                            ARRAY_SIZE(HeaderHolder));
     return static_cast<u16>(Crc);
   }
@@ -176,9 +175,9 @@ struct QuarantineCallback {
                      Chunk);
     }
     Chunk->eraseHeader();
-    void *Ptr = Chunk->getAllocBeg(&Header);
-    if (Header.FromPrimary)
-      getBackendAllocator().deallocatePrimary(Cache_, Ptr);
+    void *Ptr = Chunk->getBackendPtr(&Header);
+    if (Header.ClassId)
+      getBackendAllocator().deallocatePrimary(Cache_, Ptr, Header.ClassId);
     else
       getBackendAllocator().deallocateSecondary(Ptr);
   }
@@ -186,16 +185,17 @@ struct QuarantineCallback {
   // Internal quarantine allocation and deallocation functions. We first check
   // that the batches are indeed serviced by the Primary.
   // TODO(kostyak): figure out the best way to protect the batches.
-  COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize);
   void *Allocate(uptr Size) {
-    return getBackendAllocator().allocatePrimary(Cache_, Size);
+    return getBackendAllocator().allocatePrimary(Cache_, BatchClassId);
   }
 
   void Deallocate(void *Ptr) {
-    getBackendAllocator().deallocatePrimary(Cache_, Ptr);
+    getBackendAllocator().deallocatePrimary(Cache_, Ptr, BatchClassId);
   }
 
   AllocatorCache *Cache_;
+  COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize);
+  const uptr BatchClassId = SizeClassMap::ClassID(sizeof(QuarantineBatch));
 };
 
 typedef Quarantine<QuarantineCallback, ScudoChunk> ScudoQuarantine;
@@ -217,9 +217,6 @@ struct ScudoAllocator {
   ScudoBackendAllocator BackendAllocator;
   ScudoQuarantine AllocatorQuarantine;
 
-  StaticSpinMutex GlobalPrngMutex;
-  ScudoPrng GlobalPrng;
-
   u32 QuarantineChunksUpToSize;
 
   bool DeallocationTypeMismatch;
@@ -235,10 +232,7 @@ struct ScudoAllocator {
   explicit ScudoAllocator(LinkerInitialized)
     : AllocatorQuarantine(LINKER_INITIALIZED) {}
 
-  void init() {
-    SanitizerToolName = "Scudo";
-    initFlags();
-
+  void performSanityChecks() {
     // Verify that the header offset field can hold the maximum offset. In the
     // case of the Secondary allocator, it takes care of alignment and the
     // offset will always be 0. In the case of the Primary, the worst case
@@ -248,9 +242,9 @@ struct ScudoAllocator {
     // result, the maximum offset will be at most the maximum alignment for the
     // last size class minus the header size, in multiples of MinAlignment.
     UnpackedHeader Header = {};
-    uptr MaxPrimaryAlignment =
+    const uptr MaxPrimaryAlignment =
         1 << MostSignificantSetBitIndex(SizeClassMap::kMaxSize - MinAlignment);
-    uptr MaxOffset =
+    const uptr MaxOffset =
         (MaxPrimaryAlignment - AlignedChunkHeaderSize) >> MinAlignmentLog;
     Header.Offset = MaxOffset;
     if (Header.Offset != MaxOffset) {
@@ -262,13 +256,26 @@ struct ScudoAllocator {
     // case scenario happens in the Primary. It will depend on the second to
     // last and last class sizes, as well as the dynamic base for the Primary.
     // The following is an over-approximation that works for our needs.
-    uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1;
+    const uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1;
     Header.SizeOrUnusedBytes = MaxSizeOrUnusedBytes;
     if (Header.SizeOrUnusedBytes != MaxSizeOrUnusedBytes) {
       dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in "
                      "the header\n");
     }
 
+    const uptr LargestClassId = SizeClassMap::kLargestClassID;
+    Header.ClassId = LargestClassId;
+    if (Header.ClassId != LargestClassId) {
+      dieWithMessage("ERROR: the largest class ID doesn't fit in the header\n");
+    }
+  }
+
+  void init() {
+    SanitizerToolName = "Scudo";
+    initFlags();
+
+    performSanityChecks();
+
     // Check if hardware CRC32 is supported in the binary and by the platform,
     // if so, opt for the CRC32 hardware version of the checksum.
     if (&computeHardwareCRC32 && hasHardwareCRC32())
@@ -286,8 +293,11 @@ struct ScudoAllocator {
     DeleteSizeMismatch = getFlags()->DeleteSizeMismatch;
     ZeroContents = getFlags()->ZeroContents;
 
-    GlobalPrng.init();
-    Cookie = GlobalPrng.getU64();
+    if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(&Cookie), sizeof(Cookie),
+                            /*blocking=*/false))) {
+      Cookie = static_cast<u32>((NanoTime() >> 12) ^
+                                (reinterpret_cast<uptr>(this) >> 4));
+    }
 
     CheckRssLimit = HardRssLimitMb || SoftRssLimitMb;
     if (CheckRssLimit)
@@ -365,23 +375,21 @@ struct ScudoAllocator {
     // Primary and Secondary backed allocations have a different treatment. We
     // deal with alignment requirements of Primary serviced allocations here,
     // but the Secondary will take care of its own alignment needs.
-    bool FromPrimary = PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment);
+    const bool FromPrimary =
+        PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment);
 
     void *Ptr;
-    u8 Salt;
+    u8 ClassId;
     uptr AllocSize;
     if (FromPrimary) {
       AllocSize = AlignedSize;
+      ClassId = SizeClassMap::ClassID(AllocSize);
       ScudoTSD *TSD = getTSDAndLock();
-      Salt = TSD->Prng.getU8();
-      Ptr = BackendAllocator.allocatePrimary(&TSD->Cache, AllocSize);
+      Ptr = BackendAllocator.allocatePrimary(&TSD->Cache, ClassId);
       TSD->unlock();
     } else {
-      {
-        SpinMutexLock l(&GlobalPrngMutex);
-        Salt = GlobalPrng.getU8();
-      }
       AllocSize = NeededSize;
+      ClassId = 0;
       Ptr = BackendAllocator.allocateSecondary(AllocSize, Alignment);
     }
     if (UNLIKELY(!Ptr))
@@ -389,26 +397,25 @@ struct ScudoAllocator {
 
     // If requested, we will zero out the entire contents of the returned chunk.
     if ((ForceZeroContents || ZeroContents) && FromPrimary)
-      memset(Ptr, 0, BackendAllocator.getActuallyAllocatedSize(
-          Ptr, /*FromPrimary=*/true));
+      memset(Ptr, 0, BackendAllocator.getActuallyAllocatedSize(Ptr, ClassId));
 
     UnpackedHeader Header = {};
-    uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
-    uptr UserBeg = AllocBeg + AlignedChunkHeaderSize;
+    uptr BackendPtr = reinterpret_cast<uptr>(Ptr);
+    uptr UserBeg = BackendPtr + AlignedChunkHeaderSize;
     if (UNLIKELY(!IsAligned(UserBeg, Alignment))) {
       // Since the Secondary takes care of alignment, a non-aligned pointer
       // means it is from the Primary. It is also the only case where the offset
       // field of the header would be non-zero.
       CHECK(FromPrimary);
       UserBeg = RoundUpTo(UserBeg, Alignment);
-      uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg;
+      uptr Offset = UserBeg - AlignedChunkHeaderSize - BackendPtr;
       Header.Offset = Offset >> MinAlignmentLog;
     }
-    CHECK_LE(UserBeg + Size, AllocBeg + AllocSize);
+    CHECK_LE(UserBeg + Size, BackendPtr + AllocSize);
+    Header.ClassId = ClassId;
     Header.State = ChunkAllocated;
     Header.AllocType = Type;
     if (FromPrimary) {
-      Header.FromPrimary = 1;
       Header.SizeOrUnusedBytes = Size;
     } else {
       // The secondary fits the allocations to a page, so the amount of unused
@@ -419,7 +426,6 @@ struct ScudoAllocator {
       if (TrailingBytes)
         Header.SizeOrUnusedBytes = PageSize - TrailingBytes;
     }
-    Header.Salt = Salt;
     getScudoChunk(UserBeg)->storeHeader(&Header);
     void *UserPtr = reinterpret_cast<void *>(UserBeg);
     // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size);
@@ -435,10 +441,11 @@ struct ScudoAllocator {
         (Size > QuarantineChunksUpToSize);
     if (BypassQuarantine) {
       Chunk->eraseHeader();
-      void *Ptr = Chunk->getAllocBeg(Header);
-      if (Header->FromPrimary) {
+      void *Ptr = Chunk->getBackendPtr(Header);
+      if (Header->ClassId) {
         ScudoTSD *TSD = getTSDAndLock();
-        getBackendAllocator().deallocatePrimary(&TSD->Cache, Ptr);
+        getBackendAllocator().deallocatePrimary(&TSD->Cache, Ptr,
+                                                Header->ClassId);
         TSD->unlock();
       } else {
         getBackendAllocator().deallocateSecondary(Ptr);
@@ -496,7 +503,7 @@ struct ScudoAllocator {
         }
       }
     }
-    uptr Size = Header.FromPrimary ? Header.SizeOrUnusedBytes :
+    uptr Size = Header.ClassId ? Header.SizeOrUnusedBytes :
         Chunk->getUsableSize(&Header) - Header.SizeOrUnusedBytes;
     if (DeleteSizeMismatch) {
       if (DeleteSize && DeleteSize != Size) {
@@ -536,7 +543,7 @@ struct ScudoAllocator {
         (UsableSize - NewSize) < (SizeClassMap::kMaxSize / 2)) {
       UnpackedHeader NewHeader = OldHeader;
       NewHeader.SizeOrUnusedBytes =
-                OldHeader.FromPrimary ? NewSize : UsableSize - NewSize;
+          OldHeader.ClassId ? NewSize : UsableSize - NewSize;
       Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
       return OldPtr;
     }
@@ -544,7 +551,7 @@ struct ScudoAllocator {
     // old one.
     void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc);
     if (NewPtr) {
-      uptr OldSize = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes :
+      uptr OldSize = OldHeader.ClassId ? OldHeader.SizeOrUnusedBytes :
           UsableSize - OldHeader.SizeOrUnusedBytes;
       memcpy(NewPtr, OldPtr, Min(NewSize, UsableSize));
       quarantineOrDeallocateChunk(Chunk, &OldHeader, OldSize);
@@ -608,7 +615,6 @@ void initScudo() {
 void ScudoTSD::init(bool Shared) {
   UnlockRequired = Shared;
   getBackendAllocator().initCache(&Cache);
-  Prng.init();
   memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder));
 }
 

Modified: compiler-rt/trunk/lib/scudo/scudo_allocator.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/scudo/scudo_allocator.h?rev=319791&r1=319790&r2=319791&view=diff
==============================================================================
--- compiler-rt/trunk/lib/scudo/scudo_allocator.h (original)
+++ compiler-rt/trunk/lib/scudo/scudo_allocator.h Tue Dec  5 09:08:29 2017
@@ -39,16 +39,15 @@ enum ChunkState : u8 {
 typedef u64 PackedHeader;
 struct UnpackedHeader {
   u64 Checksum          : 16;
-  u64 SizeOrUnusedBytes : 19;  // Size for Primary backed allocations, amount of
+  u64 ClassId           : 8;
+  u64 SizeOrUnusedBytes : 20;  // Size for Primary backed allocations, amount of
                                // unused bytes in the chunk for Secondary ones.
-  u64 FromPrimary       : 1;
   u64 State             : 2;   // available, allocated, or quarantined
   u64 AllocType         : 2;   // malloc, new, new[], or memalign
   u64 Offset            : 16;  // Offset from the beginning of the backend
                                // allocation to the beginning of the chunk
                                // itself, in multiples of MinAlignment. See
                                // comment about its maximum value and in init().
-  u64 Salt              : 8;
 };
 
 typedef atomic_uint64_t AtomicPackedHeader;

Modified: compiler-rt/trunk/lib/scudo/scudo_allocator_combined.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/scudo/scudo_allocator_combined.h?rev=319791&r1=319790&r2=319791&view=diff
==============================================================================
--- compiler-rt/trunk/lib/scudo/scudo_allocator_combined.h (original)
+++ compiler-rt/trunk/lib/scudo/scudo_allocator_combined.h Tue Dec  5 09:08:29 2017
@@ -31,8 +31,8 @@ class ScudoCombinedAllocator {
 
   // Primary allocations are always MinAlignment aligned, and as such do not
   // require an Alignment parameter.
-  void *allocatePrimary(AllocatorCache *Cache, uptr Size) {
-    return Cache->Allocate(&Primary, Primary.ClassID(Size));
+  void *allocatePrimary(AllocatorCache *Cache, uptr ClassId) {
+    return Cache->Allocate(&Primary, ClassId);
   }
 
   // Secondary allocations do not require a Cache, but do require an Alignment
@@ -41,17 +41,17 @@ class ScudoCombinedAllocator {
     return Secondary.Allocate(&Stats, Size, Alignment);
   }
 
-  void deallocatePrimary(AllocatorCache *Cache, void *Ptr) {
-    Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr);
+  void deallocatePrimary(AllocatorCache *Cache, void *Ptr, uptr ClassId) {
+    Cache->Deallocate(&Primary, ClassId, Ptr);
   }
 
   void deallocateSecondary(void *Ptr) {
     Secondary.Deallocate(&Stats, Ptr);
   }
 
-  uptr getActuallyAllocatedSize(void *Ptr, bool FromPrimary) {
-    if (FromPrimary)
-      return PrimaryAllocator::ClassIdToSize(Primary.GetSizeClass(Ptr));
+  uptr getActuallyAllocatedSize(void *Ptr, uptr ClassId) {
+    if (ClassId)
+      return PrimaryAllocator::ClassIdToSize(ClassId);
     return Secondary.GetActuallyAllocatedSize(Ptr);
   }
 

Modified: compiler-rt/trunk/lib/scudo/scudo_tsd.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/scudo/scudo_tsd.h?rev=319791&r1=319790&r2=319791&view=diff
==============================================================================
--- compiler-rt/trunk/lib/scudo/scudo_tsd.h (original)
+++ compiler-rt/trunk/lib/scudo/scudo_tsd.h Tue Dec  5 09:08:29 2017
@@ -25,7 +25,6 @@ namespace __scudo {
 
 struct ALIGNED(64) ScudoTSD {
   AllocatorCache Cache;
-  ScudoPrng Prng;
   uptr QuarantineCachePlaceHolder[4];
 
   void init(bool Shared);

Modified: compiler-rt/trunk/lib/scudo/scudo_utils.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/scudo/scudo_utils.h?rev=319791&r1=319790&r2=319791&view=diff
==============================================================================
--- compiler-rt/trunk/lib/scudo/scudo_utils.h (original)
+++ compiler-rt/trunk/lib/scudo/scudo_utils.h Tue Dec  5 09:08:29 2017
@@ -31,62 +31,6 @@ INLINE Dest bit_cast(const Source& sourc
 void NORETURN dieWithMessage(const char *Format, ...);
 
 bool hasHardwareCRC32();
-
-INLINE u64 rotl(const u64 X, int K) {
-  return (X << K) | (X >> (64 - K));
-}
-
-// XoRoShiRo128+ PRNG (http://xoroshiro.di.unimi.it/).
-struct XoRoShiRo128Plus {
- public:
-  void init() {
-    if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(State), sizeof(State),
-                            /*blocking=*/false))) {
-      // On some platforms, early processes like `init` do not have an
-      // initialized random pool (getrandom blocks and /dev/urandom doesn't
-      // exist yet), but we still have to provide them with some degree of
-      // entropy. Not having a secure seed is not as problematic for them, as
-      // they are less likely to be the target of heap based vulnerabilities
-      // exploitation attempts.
-      State[0] = NanoTime();
-      State[1] = 0;
-    }
-    fillCache();
-  }
-  u8 getU8() {
-    if (UNLIKELY(isCacheEmpty()))
-      fillCache();
-    const u8 Result = static_cast<u8>(CachedBytes & 0xff);
-    CachedBytes >>= 8;
-    CachedBytesAvailable--;
-    return Result;
-  }
-  u64 getU64() { return next(); }
-
- private:
-  u8 CachedBytesAvailable;
-  u64 CachedBytes;
-  u64 State[2];
-  u64 next() {
-    const u64 S0 = State[0];
-    u64 S1 = State[1];
-    const u64 Result = S0 + S1;
-    S1 ^= S0;
-    State[0] = rotl(S0, 55) ^ S1 ^ (S1 << 14);
-    State[1] = rotl(S1, 36);
-    return Result;
-  }
-  bool isCacheEmpty() {
-    return CachedBytesAvailable == 0;
-  }
-  void fillCache() {
-    CachedBytes = next();
-    CachedBytesAvailable = sizeof(CachedBytes);
-  }
-};
-
-typedef XoRoShiRo128Plus ScudoPrng;
-
 }  // namespace __scudo
 
 #endif  // SCUDO_UTILS_H_