[libc-commits] [libc] [libc][malloc] Reuse the prev_ field for allocated blocks (PR #101265)

via libc-commits libc-commits at lists.llvm.org
Tue Jul 30 16:58:53 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Daniel Thornburgh (mysterymath)

<details>
<summary>Changes</summary>

This applies a standard trick from Knuth for storing boundary tags with only one word of overhead for allocated blocks. The prev_ block is now only valid if the previous block is free.

This is safe, since only coalescing with a free node requires walking the blocks backwards. To allow determining whether it's safe to traverse backwards, the used flag is changed to a prev_free flag. Since it's still possible to unconditionally traverse forward, the prev_free flag for the next block can be used wherever the old used flag is, so long as there is always a next block.

To ensure there is always a next block, a sentinel last block is added at the end of the range of blocks. Due to the above, this costs only a single word per heap. This sentinel essentially just stores whether the last real block of the heap is free. The sentinel is always considered used and to have a zero inner size.

This completes the block optimizations needed to address #<!-- -->98086. The block structure should now be size-competitive with dlmalloc, although there are still a couple of broader fragmentation concerns to address.

---

Patch is 40.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101265.diff


5 Files Affected:

- (modified) libc/src/__support/block.h (+112-118) 
- (modified) libc/src/__support/freelist_heap.h (+7-33) 
- (modified) libc/test/src/__support/block_test.cpp (+86-113) 
- (modified) libc/test/src/__support/freelist_heap_test.cpp (+1-1) 
- (modified) libc/test/src/__support/freelist_malloc_test.cpp (+16-33) 


``````````diff
diff --git a/libc/src/__support/block.h b/libc/src/__support/block.h
index 6e7186f3224b9..ecea49926eda5 100644
--- a/libc/src/__support/block.h
+++ b/libc/src/__support/block.h
@@ -95,6 +95,26 @@ using cpp::optional;
 /// +----------+----------+--------------+
 /// @endcode
 ///
+/// As a space optimization, when a block is allocated, it consumes the prev
+/// field of the following block:
+///
+/// Block 1 (used):
+/// +---------------------+--------------+
+/// | Header              | Usable space |
+/// +----------+----------+--------------+
+/// | prev     | next     |              |
+/// | 0......3 | 4......7 | 8........230 |
+/// | 00000000 | 00000230 |  <app data>  |
+/// +----------+----------+--------------+
+/// Block 2:
+/// +---------------------+--------------+
+/// | B1       | Header   | Usable space |
+/// +----------+----------+--------------+
+/// |          | next     |              |
+/// | 0......3 | 4......7 | 8........827 |
+/// | xxxxxxxx | 00000830 | f7f7....f7f7 |
+/// +----------+----------+--------------+
+///
 /// The next offset of a block matches the previous offset of its next block.
 /// The first block in a list is denoted by having a previous offset of `0`.
 ///
@@ -110,9 +130,9 @@ using cpp::optional;
 template <typename OffsetType = uintptr_t, size_t kAlign = alignof(max_align_t)>
 class Block {
   // Masks for the contents of the next_ field.
-  static constexpr size_t USED_MASK = 1 << 0;
+  static constexpr size_t PREV_FREE_MASK = 1 << 0;
   static constexpr size_t LAST_MASK = 1 << 1;
-  static constexpr size_t SIZE_MASK = ~(USED_MASK | LAST_MASK);
+  static constexpr size_t SIZE_MASK = ~(PREV_FREE_MASK | LAST_MASK);
 
 public:
   using offset_type = OffsetType;
@@ -126,7 +146,8 @@ class Block {
   Block(const Block &other) = delete;
   Block &operator=(const Block &other) = delete;
 
-  /// Creates the first block for a given memory region.
+  /// Creates the first block for a given memory region, followed by a sentinel
+  /// last block. Returns the first block.
   static optional<Block *> init(ByteSpan region);
 
   /// @returns  A pointer to a `Block`, given a pointer to the start of the
@@ -149,7 +170,12 @@ class Block {
   size_t outer_size() const { return next_ & SIZE_MASK; }
 
   /// @returns The number of usable bytes inside the block.
-  size_t inner_size() const { return outer_size() - BLOCK_OVERHEAD; }
+  size_t inner_size() const {
+    if (!next())
+      return 0;
+    // The usable region includes the prev_ field of the next block.
+    return outer_size() - BLOCK_OVERHEAD + sizeof(prev_);
+  }
 
   /// @returns A pointer to the usable space inside this block.
   cpp::byte *usable_space() {
@@ -167,8 +193,9 @@ class Block {
   /// Attempts to split this block.
   ///
   /// If successful, the block will have an inner size of `new_inner_size`,
-  /// rounded up to a `ALIGNMENT` boundary. The remaining space will be
-  /// returned as a new block.
+  /// rounded to ensure that the split point is on an ALIGNMENT boundary. The
+  /// remaining space will be returned as a new block. Note that the prev_ field
+  /// of the next block counts as part of the inner size of the returnd block.
   ///
   /// This method may fail if the remaining space is too small to hold a new
   /// block. If this method fails for any reason, the original block is
@@ -182,40 +209,39 @@ class Block {
   /// is the last block.
   Block *next() const;
 
-  /// @returns The block immediately before this one, or a null pointer if this
-  /// is the first block.
-  Block *prev() const;
+  /// @returns The free block immediately before this one, otherwise nullptr.
+  Block *prev_free() const;
 
-  /// Indicates whether the block is in use.
-  ///
-  /// @returns `true` if the block is in use or `false` if not.
-  bool used() const { return next_ & USED_MASK; }
+  /// @returns Whether the block is unavailable for allocation.
+  bool used() const { return !next() || !next()->prev_free(); }
 
   /// Marks this block as in use.
-  void mark_used() { next_ |= USED_MASK; }
+  void mark_used() {
+    LIBC_ASSERT(next() && "last block is always considered used");
+    next()->next_ &= ~PREV_FREE_MASK;
+  }
 
   /// Marks this block as free.
-  void mark_free() { next_ &= ~USED_MASK; }
+  void mark_free() {
+    LIBC_ASSERT(next() && "last block is always considered used");
+    next()->next_ |= PREV_FREE_MASK;
+    // The next block's prev_ field becomes alive, as it is no longer part of
+    // this block's used space.
+    *new (&next()->prev_) offset_type = outer_size();
+  }
 
   /// Marks this block as the last one in the chain. Makes next() return
   /// nullptr.
-  constexpr void mark_last() { next_ |= LAST_MASK; }
+  void mark_last() { next_ |= LAST_MASK; }
 
-  /// @brief Checks if a block is valid.
-  ///
-  /// @returns `true` if and only if the following conditions are met:
-  /// * The block is aligned.
-  /// * The prev/next fields match with the previous and next blocks.
-  bool is_valid() const {
-    return check_status() == internal::BlockStatus::VALID;
-  }
-
-  constexpr Block(size_t prev_outer_size, size_t outer_size);
+  constexpr Block(size_t outer_size);
 
   bool is_usable_space_aligned(size_t alignment) const {
     return reinterpret_cast<uintptr_t>(usable_space()) % alignment == 0;
   }
 
+  /// @returns The new inner size of this block that would give the usable
+  /// space of the next block the given alignment.
   size_t padding_for_alignment(size_t alignment) const {
     if (is_usable_space_aligned(alignment))
       return 0;
@@ -235,9 +261,11 @@ class Block {
     //                            ^
     //                            Alignment requirement
     //
-    uintptr_t start = reinterpret_cast<uintptr_t>(usable_space());
     alignment = cpp::max(alignment, ALIGNMENT);
-    return align_up(start + BLOCK_OVERHEAD, alignment) - start;
+    uintptr_t start = reinterpret_cast<uintptr_t>(usable_space());
+    uintptr_t next_usable_space = align_up(start + BLOCK_OVERHEAD, alignment);
+    uintptr_t next_block = next_usable_space - BLOCK_OVERHEAD;
+    return next_block - start + sizeof(prev_);
   }
 
   // Check that we can `allocate` a block with a given alignment and size from
@@ -272,21 +300,16 @@ class Block {
 private:
   /// Construct a block to represent a span of bytes. Overwrites only enough
   /// memory for the block header; the rest of the span is left alone.
-  static Block *as_block(size_t prev_outer_size, ByteSpan bytes);
-
-  /// Returns a `BlockStatus` that is either VALID or indicates the reason why
-  /// the block is invalid.
-  ///
-  /// If the block is invalid at multiple points, this function will only return
-  /// one of the reasons.
-  internal::BlockStatus check_status() const;
+  static Block *as_block(ByteSpan bytes);
 
   /// Like `split`, but assumes the caller has already checked to parameters to
   /// ensure the split will succeed.
   Block *split_impl(size_t new_inner_size);
 
   /// Offset from this block to the previous block. 0 if this is the first
-  /// block.
+  /// block. This field is only alive when the previous block is free;
+  /// otherwise, its memory is reused as part of the previous block's usable
+  /// space.
   offset_type prev_ = 0;
 
   /// Offset from this block to the next block. Valid even if this is the last
@@ -296,14 +319,12 @@ class Block {
   /// Information about the current state of the block is stored in the two low
   /// order bits of the next_ value. These are guaranteed free by a minimum
   /// alignment (and thus, alignment of the size) of 4. The lowest bit is the
-  /// `used` flag, and the other bit is the `last` flag.
+  /// `prev_free` flag, and the other bit is the `last` flag.
   ///
-  /// * If the `used` flag is set, the block's usable memory has been allocated
-  ///   and is being used.
-  /// * If the `last` flag is set, the block does not have a next block.
-  /// * If the `used` flag is set, the alignment represents the requested value
-  ///   when the memory was allocated, which may be less strict than the actual
-  ///   alignment.
+  /// * If the `prev_free` flag is set, the block isn't the first and the
+  ///   previous block is free.
+  /// * If the `last` flag is set, the block is the sentinel last block. It is
+  ///   summarily considered used and has no next block.
 } __attribute__((packed, aligned(cpp::max(kAlign, size_t{4}))));
 
 // Public template method implementations.
@@ -332,29 +353,34 @@ Block<OffsetType, kAlign>::init(ByteSpan region) {
     return {};
 
   region = result.value();
-  if (region.size() < BLOCK_OVERHEAD)
+  if (region.size() < 2*BLOCK_OVERHEAD)
     return {};
 
   if (cpp::numeric_limits<OffsetType>::max() < region.size())
     return {};
 
-  Block *block = as_block(0, region);
-  block->mark_last();
+  Block *block = as_block(region.first(region.size() - BLOCK_OVERHEAD));
+  Block *last = as_block(region.last(BLOCK_OVERHEAD));
+  block->mark_free();
+  last->mark_last();
   return block;
 }
 
 template <typename OffsetType, size_t kAlign>
 bool Block<OffsetType, kAlign>::can_allocate(size_t alignment,
                                              size_t size) const {
-  if (is_usable_space_aligned(alignment) && inner_size() >= size)
-    return true; // Size and alignment constraints met.
-
-  // Either the alignment isn't met or we don't have enough size.
-  // If we don't meet alignment, we can always adjust such that we do meet the
-  // alignment. If we meet the alignment but just don't have enough size. This
-  // check will fail anyway.
-  size_t adjustment = padding_for_alignment(alignment);
-  return inner_size() >= size + adjustment;
+  if (inner_size() < size)
+    return false;
+  if (is_usable_space_aligned(alignment))
+    return true;
+
+  // Alignment isn't met, so a padding block is needed. Determine amount of
+  // inner_size() consumed by the padding block.
+  size_t padding_size = padding_for_alignment(alignment) - sizeof(prev_);
+
+  // Check that there is room for the allocation in the following aligned block.
+  size_t aligned_inner_size = inner_size() - padding_size - BLOCK_OVERHEAD;
+  return size <= aligned_inner_size;
 }
 
 template <typename OffsetType, size_t kAlign>
@@ -369,26 +395,19 @@ Block<OffsetType, kAlign>::allocate(Block *block, size_t alignment,
   BlockInfo info{block, /*prev=*/nullptr, /*next=*/nullptr};
 
   if (!info.block->is_usable_space_aligned(alignment)) {
-    size_t adjustment = info.block->padding_for_alignment(alignment);
-    LIBC_ASSERT((adjustment - BLOCK_OVERHEAD) % ALIGNMENT == 0 &&
-                "The adjustment calculation should always return a new size "
-                "that's a multiple of ALIGNMENT");
-
     Block *original = info.block;
     optional<Block *> maybe_aligned_block =
-        original->split(adjustment - BLOCK_OVERHEAD);
+        original->split(info.block->padding_for_alignment(alignment));
     LIBC_ASSERT(maybe_aligned_block.has_value() &&
                 "This split should always result in a new block. The check in "
                 "`can_allocate` ensures that we have enough space here to make "
                 "two blocks.");
 
-    if (Block *prev = original->prev()) {
-      // If there is a block before this, we can merge the current one with the
+    if (Block *prev = original->prev_free()) {
+      // If there is a free block before this, we can merge the current one with the
       // newly created one.
       prev->merge_next();
     } else {
-      // Otherwise, this was the very first block in the chain. Now we can make
-      // it the new first block.
       info.prev = original;
     }
 
@@ -410,9 +429,14 @@ optional<Block<OffsetType, kAlign> *>
 Block<OffsetType, kAlign>::split(size_t new_inner_size) {
   if (used())
     return {};
+  // The prev_ field of the next block is always available, so there is a minimum size to
+  // a block created through splitting.
+  if (new_inner_size < sizeof(prev_))
+    return {};
 
   size_t old_inner_size = inner_size();
-  new_inner_size = align_up(new_inner_size, ALIGNMENT);
+  new_inner_size = align_up(new_inner_size - sizeof(prev_), ALIGNMENT) +
+                   sizeof(prev_);
   if (old_inner_size < new_inner_size)
     return {};
 
@@ -425,41 +449,26 @@ Block<OffsetType, kAlign>::split(size_t new_inner_size) {
 template <typename OffsetType, size_t kAlign>
 Block<OffsetType, kAlign> *
 Block<OffsetType, kAlign>::split_impl(size_t new_inner_size) {
-  size_t outer_size1 = new_inner_size + BLOCK_OVERHEAD;
-  bool has_next = next();
+  size_t outer_size1 = new_inner_size - sizeof(prev_) + BLOCK_OVERHEAD;
+  LIBC_ASSERT(outer_size1 % ALIGNMENT == 0 && "new size must be aligned");
   ByteSpan new_region = region().subspan(outer_size1);
-  LIBC_ASSERT(!used() && "used blocks cannot be split");
-  // The low order bits of outer_size1 should both be zero, and is the correct
-  // value for the flags is false.
-  next_ = outer_size1;
-  LIBC_ASSERT(!used() && next() && "incorrect first split flags");
-  Block *new_block = as_block(outer_size1, new_region);
-
-  if (has_next) {
-    // The two flags are both false, so next_ is a plain size.
-    LIBC_ASSERT(!new_block->used() && next() && "flags disrupt use of size");
-    new_block->next()->prev_ = new_block->next_;
-  } else {
-    new_block->mark_last();
-  }
+  next_ &= ~SIZE_MASK;
+  next_ |= outer_size1;
+
+  Block *new_block = as_block(new_region);
+  mark_free(); // Free status for this block is now stored in new_block.
+  new_block->next()->prev_ = new_region.size();
   return new_block;
 }
 
 template <typename OffsetType, size_t kAlign>
 bool Block<OffsetType, kAlign>::merge_next() {
-  if (used() || !next() || next()->used())
+  if (used() || next()->used())
     return false;
-
-  // Extend the size and copy the last() flag from the next block to this one.
-  next_ &= SIZE_MASK;
-  next_ += next()->next_;
-
-  if (next()) {
-    // The two flags are both false, so next_ is a plain size.
-    LIBC_ASSERT(!used() && next() && "flags disrupt use of size");
-    next()->prev_ = next_;
-  }
-
+  size_t new_size = outer_size() + next()->outer_size();
+  next_ &= ~SIZE_MASK;
+  next_ |= new_size;
+  next()->prev_ = new_size;
   return true;
 }
 
@@ -472,39 +481,24 @@ Block<OffsetType, kAlign> *Block<OffsetType, kAlign>::next() const {
 }
 
 template <typename OffsetType, size_t kAlign>
-Block<OffsetType, kAlign> *Block<OffsetType, kAlign>::prev() const {
-  uintptr_t addr = (prev_ == 0) ? 0 : reinterpret_cast<uintptr_t>(this) - prev_;
-  return reinterpret_cast<Block *>(addr);
+Block<OffsetType, kAlign> *Block<OffsetType, kAlign>::prev_free() const {
+  if (!(next_ & PREV_FREE_MASK))
+    return nullptr;
+  return reinterpret_cast<Block *>(reinterpret_cast<uintptr_t>(this) - prev_);
 }
 
 // Private template method implementations.
 
 template <typename OffsetType, size_t kAlign>
-constexpr Block<OffsetType, kAlign>::Block(size_t prev_outer_size,
-                                           size_t outer_size) {
-  prev_ = prev_outer_size;
+constexpr Block<OffsetType, kAlign>::Block(size_t outer_size)
+    : next_(outer_size) {
   LIBC_ASSERT(outer_size % ALIGNMENT == 0 && "block sizes must be aligned");
-  next_ = outer_size;
 }
 
 template <typename OffsetType, size_t kAlign>
 Block<OffsetType, kAlign> *
-Block<OffsetType, kAlign>::as_block(size_t prev_outer_size, ByteSpan bytes) {
-  return ::new (bytes.data()) Block(prev_outer_size, bytes.size());
-}
-
-template <typename OffsetType, size_t kAlign>
-internal::BlockStatus Block<OffsetType, kAlign>::check_status() const {
-  if (reinterpret_cast<uintptr_t>(this) % ALIGNMENT != 0)
-    return internal::BlockStatus::MISALIGNED;
-
-  if (next() && (this >= next() || this != next()->prev()))
-    return internal::BlockStatus::NEXT_MISMATCHED;
-
-  if (prev() && (this <= prev() || this != prev()->next()))
-    return internal::BlockStatus::PREV_MISMATCHED;
-
-  return internal::BlockStatus::VALID;
+Block<OffsetType, kAlign>::as_block(ByteSpan bytes) {
+  return ::new (bytes.data()) Block(bytes.size());
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/freelist_heap.h b/libc/src/__support/freelist_heap.h
index fed00d06716cf..6c860d039553a 100644
--- a/libc/src/__support/freelist_heap.h
+++ b/libc/src/__support/freelist_heap.h
@@ -41,15 +41,6 @@ template <size_t NUM_BUCKETS = DEFAULT_BUCKETS.size()> class FreeListHeap {
   static constexpr size_t MIN_ALIGNMENT =
       cpp::max(BlockType::ALIGNMENT, alignof(max_align_t));
 
-  struct HeapStats {
-    size_t total_bytes;
-    size_t bytes_allocated;
-    size_t cumulative_allocated;
-    size_t cumulative_freed;
-    size_t total_allocate_calls;
-    size_t total_free_calls;
-  };
-
   constexpr FreeListHeap() : begin_(&_end), end_(&__llvm_libc_heap_limit) {}
 
   constexpr FreeListHeap(span<cpp::byte> region)
@@ -63,8 +54,6 @@ template <size_t NUM_BUCKETS = DEFAULT_BUCKETS.size()> class FreeListHeap {
   void *realloc(void *ptr, size_t size);
   void *calloc(size_t num, size_t size);
 
-  const HeapStats &heap_stats() const { return heap_stats_; }
-
   cpp::span<cpp::byte> region() const { return {begin_, end_}; }
 
 private:
@@ -82,7 +71,6 @@ template <size_t NUM_BUCKETS = DEFAULT_BUCKETS.size()> class FreeListHeap {
   cpp::byte *begin_;
   cpp::byte *end_;
   FreeListType freelist_{DEFAULT_BUCKETS};
-  HeapStats heap_stats_{};
 };
 
 template <size_t BUFF_SIZE, size_t NUM_BUCKETS = DEFAULT_BUCKETS.size()>
@@ -100,7 +88,6 @@ class FreeListHeapBuffer : public FreeListHeap<NUM_BUCKETS> {
 
 template <size_t NUM_BUCKETS> void FreeListHeap<NUM_BUCKETS>::init() {
   LIBC_ASSERT(!is_initialized_ && "duplicate initialization");
-  heap_stats_.total_bytes = region().size();
   auto result = BlockType::init(region());
   BlockType *block = *result;
   freelist_.add_chunk(block_to_span(block));
@@ -139,10 +126,6 @@ void *FreeListHeap<NUM_BUCKETS>::allocate_impl(size_t alignment, size_t size) {
 
   chunk_block->mark_used();
 
-  heap_stats_.bytes_allocated += size;
-  heap_stats_.cumulative_allocated += size;
-  heap_stats_.total_allocate_calls += 1;
-
   return chunk_block->usable_space();
 }
 
@@ -171,35 +154,26 @@ template <size_t NUM_BUCKETS> void FreeListHeap<NUM_BUCKETS>::free(void *ptr) {
   LIBC_ASSERT(is_valid_ptr(bytes) && "Invalid pointer");
 
   BlockType *chunk_block = BlockType::from_usable_space(bytes);
-
-  size_t size_freed = chunk_block->inner_size();
+  LIBC_ASSERT(chunk_block->next() && "sentinel last block cannot be freed");
   LIBC_ASSERT(chunk_block->used() && "The block is not in-use");
   chunk_block->mark_free();
 
   // Can we combine with the left or right blocks?
-  BlockType *prev = chunk_block->prev();
-  BlockType *next = nullptr;
+  BlockType *prev_free = chunk_block->prev_free();
+  BlockType *next = chunk_block->next();
 
-  if (chunk_block->next())
-    next = chunk_block->next();
-
-  if (prev != nullptr && !prev->used()) {
+  if (prev_free != nullptr) {
     // Remove from freelist and merge
-    freelist_.remove_chunk(block_to_span(prev));
-    chunk_block = chunk_block->prev();
+    freelist_.remove_chunk(block_to_span(prev_free));
+    chunk_block = prev_free;
     chunk_block->merge_next();
   }
-
-  if (next != nullptr && !next->used()) {
+  if (!next->used()) {
     freelist_.remove_chunk(block_to_span(next));
     chunk_block->merge_next();
   }
   // Add back to the freelist
   freelist_.add_chunk(block_to_span(chunk_block));
-
-  heap_stats_.bytes_allocated -= size_freed;
-  heap_stats_.cumulative_freed += size_freed;
-  heap_stats_.total_free_calls += 1;
 }
 
 // Follows constract of the C standard realloc() function
diff --git a/libc/test/src/__support/block_test.cpp b/libc/test/src/__support/block_test.cpp
index ecce00b7926f9..a6625b58f0094 100644
--- a/libc/test/src/__support/block_test.cpp
+++ b/libc/test/src/__support/block_test.cpp
@@ -49,10 +49,20 @@ TEST_FOR_EACH_BLOCK_TYPE(CanCreateSingleAlignedBlock) {
   ASSERT_TRUE(result.has_value());
   BlockType *block = *result;
 
-  EXPECT_EQ(block->outer_size(), kN);
-  EXPECT_EQ(block->inner_size(), kN - BlockType::BLOCK_OVERHEAD);
-  EXPECT_EQ(block->prev(), static_cast<BlockType *>(nullptr));
-  EXPECT_EQ(block->next(), static_cast<BlockType *>(nullptr));
+  BlockType *last = block->next();
+  ASSERT_NE(last, static_cast<BlockType *>(nullptr));
+  constexpr size_t last_outer_size = BlockType::BLOCK_OVERHEAD;
+  E...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/101265


More information about the libc-commits mailing list