[llvm] r231125 - [ADT] fail-fast iterators for DenseMap

Sanjoy Das sanjoy at playingwithpointers.com
Tue Mar 3 12:46:45 PST 2015


Author: sanjoy
Date: Tue Mar  3 14:46:45 2015
New Revision: 231125

URL: http://llvm.org/viewvc/llvm-project?rev=231125&view=rev
Log:
[ADT] fail-fast iterators for DenseMap

This patch was landed in r231035 and reverted because it was buggy.
This is fixed version of the same change.

Summary:
This patch is an attempt at making `DenseMapIterator`s "fail-fast".
Fail-fast iterators that have been invalidated due to insertion into
the host `DenseMap` deterministically trip an assert (in debug mode)
on access, instead of non-deterministically hitting memory corruption
issues.

Reviewers: dexonsmith, dberlin, ruiu, chandlerc

Reviewed By: chandlerc

Subscribers: yaron.keren, chandlerc, llvm-commits

Differential Revision: http://reviews.llvm.org/D7931

Added:
    llvm/trunk/include/llvm/ADT/EpochTracker.h
Modified:
    llvm/trunk/include/llvm/ADT/DenseMap.h

Modified: llvm/trunk/include/llvm/ADT/DenseMap.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/DenseMap.h?rev=231125&r1=231124&r2=231125&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/DenseMap.h (original)
+++ llvm/trunk/include/llvm/ADT/DenseMap.h Tue Mar  3 14:46:45 2015
@@ -15,6 +15,7 @@
 #define LLVM_ADT_DENSEMAP_H
 
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/EpochTracker.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
@@ -50,7 +51,7 @@ class DenseMapIterator;
 
 template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
           typename BucketT>
-class DenseMapBase {
+class DenseMapBase : public DebugEpochBase {
 public:
   typedef unsigned size_type;
   typedef KeyT key_type;
@@ -62,16 +63,17 @@ public:
       const_iterator;
   inline iterator begin() {
     // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets().
-    return empty() ? end() : iterator(getBuckets(), getBucketsEnd());
+    return empty() ? end() : iterator(getBuckets(), getBucketsEnd(), *this);
   }
   inline iterator end() {
-    return iterator(getBucketsEnd(), getBucketsEnd(), true);
+    return iterator(getBucketsEnd(), getBucketsEnd(), *this, true);
   }
   inline const_iterator begin() const {
-    return empty() ? end() : const_iterator(getBuckets(), getBucketsEnd());
+    return empty() ? end()
+                   : const_iterator(getBuckets(), getBucketsEnd(), *this);
   }
   inline const_iterator end() const {
-    return const_iterator(getBucketsEnd(), getBucketsEnd(), true);
+    return const_iterator(getBucketsEnd(), getBucketsEnd(), *this, true);
   }
 
   bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const {
@@ -81,11 +83,13 @@ public:
 
   /// Grow the densemap so that it has at least Size buckets. Does not shrink
   void resize(size_type Size) {
+    incrementEpoch();
     if (Size > getNumBuckets())
       grow(Size);
   }
 
   void clear() {
+    incrementEpoch();
     if (getNumEntries() == 0 && getNumTombstones() == 0) return;
 
     // If the capacity of the array is huge, and the # elements used is small,
@@ -118,13 +122,13 @@ public:
   iterator find(const KeyT &Val) {
     BucketT *TheBucket;
     if (LookupBucketFor(Val, TheBucket))
-      return iterator(TheBucket, getBucketsEnd(), true);
+      return iterator(TheBucket, getBucketsEnd(), *this, true);
     return end();
   }
   const_iterator find(const KeyT &Val) const {
     const BucketT *TheBucket;
     if (LookupBucketFor(Val, TheBucket))
-      return const_iterator(TheBucket, getBucketsEnd(), true);
+      return const_iterator(TheBucket, getBucketsEnd(), *this, true);
     return end();
   }
 
@@ -137,14 +141,14 @@ public:
   iterator find_as(const LookupKeyT &Val) {
     BucketT *TheBucket;
     if (LookupBucketFor(Val, TheBucket))
-      return iterator(TheBucket, getBucketsEnd(), true);
+      return iterator(TheBucket, getBucketsEnd(), *this, true);
     return end();
   }
   template<class LookupKeyT>
   const_iterator find_as(const LookupKeyT &Val) const {
     const BucketT *TheBucket;
     if (LookupBucketFor(Val, TheBucket))
-      return const_iterator(TheBucket, getBucketsEnd(), true);
+      return const_iterator(TheBucket, getBucketsEnd(), *this, true);
     return end();
   }
 
@@ -163,12 +167,13 @@ public:
   std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
     BucketT *TheBucket;
     if (LookupBucketFor(KV.first, TheBucket))
-      return std::make_pair(iterator(TheBucket, getBucketsEnd(), true),
+      return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                             false); // Already in map.
 
     // Otherwise, insert the new element.
     TheBucket = InsertIntoBucket(KV.first, KV.second, TheBucket);
-    return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true);
+    return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
+                          true);
   }
 
   // Inserts key,value pair into the map if the key isn't already in the map.
@@ -177,14 +182,15 @@ public:
   std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) {
     BucketT *TheBucket;
     if (LookupBucketFor(KV.first, TheBucket))
-      return std::make_pair(iterator(TheBucket, getBucketsEnd(), true),
+      return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                             false); // Already in map.
-    
+
     // Otherwise, insert the new element.
     TheBucket = InsertIntoBucket(std::move(KV.first),
                                  std::move(KV.second),
                                  TheBucket);
-    return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true);
+    return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
+                          true);
   }
 
   /// insert - Range insertion of pairs.
@@ -431,6 +437,8 @@ private:
   }
 
   BucketT *InsertIntoBucketImpl(const KeyT &Key, BucketT *TheBucket) {
+    incrementEpoch();
+
     // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
     // the buckets are empty (meaning that many are filled with tombstones),
     // grow the table.
@@ -987,9 +995,10 @@ private:
 
 template <typename KeyT, typename ValueT, typename KeyInfoT, typename Bucket,
           bool IsConst>
-class DenseMapIterator {
+class DenseMapIterator : DebugEpochBase::HandleBase {
   typedef DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true> ConstIterator;
   friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>;
+  friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, false>;
 
 public:
   typedef ptrdiff_t difference_type;
@@ -1003,8 +1012,10 @@ private:
 public:
   DenseMapIterator() : Ptr(nullptr), End(nullptr) {}
 
-  DenseMapIterator(pointer Pos, pointer E, bool NoAdvance = false)
-    : Ptr(Pos), End(E) {
+  DenseMapIterator(pointer Pos, pointer E, const DebugEpochBase &Epoch,
+                   bool NoAdvance = false)
+      : DebugEpochBase::HandleBase(&Epoch), Ptr(Pos), End(E) {
+    assert(isHandleInSync() && "invalid construction!");
     if (!NoAdvance) AdvancePastEmptyBuckets();
   }
 
@@ -1015,28 +1026,40 @@ public:
             typename = typename std::enable_if<!IsConstSrc && IsConst>::type>
   DenseMapIterator(
       const DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, IsConstSrc> &I)
-      : Ptr(I.Ptr), End(I.End) {}
+      : DebugEpochBase::HandleBase(I), Ptr(I.Ptr), End(I.End) {}
 
   reference operator*() const {
+    assert(isHandleInSync() && "invalid iterator access!");
     return *Ptr;
   }
   pointer operator->() const {
+    assert(isHandleInSync() && "invalid iterator access!");
     return Ptr;
   }
 
   bool operator==(const ConstIterator &RHS) const {
-    return Ptr == RHS.operator->();
+    assert((!Ptr || isHandleInSync()) && "handle not in sync!");
+    assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!");
+    assert(getEpochAddress() == RHS.getEpochAddress() &&
+           "comparing incomparable iterators!");
+    return Ptr == RHS.Ptr;
   }
   bool operator!=(const ConstIterator &RHS) const {
-    return Ptr != RHS.operator->();
+    assert((!Ptr || isHandleInSync()) && "handle not in sync!");
+    assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!");
+    assert(getEpochAddress() == RHS.getEpochAddress() &&
+           "comparing incomparable iterators!");
+    return Ptr != RHS.Ptr;
   }
 
   inline DenseMapIterator& operator++() {  // Preincrement
+    assert(isHandleInSync() && "invalid iterator access!");
     ++Ptr;
     AdvancePastEmptyBuckets();
     return *this;
   }
   DenseMapIterator operator++(int) {  // Postincrement
+    assert(isHandleInSync() && "invalid iterator access!");
     DenseMapIterator tmp = *this; ++*this; return tmp;
   }
 

Added: llvm/trunk/include/llvm/ADT/EpochTracker.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/EpochTracker.h?rev=231125&view=auto
==============================================================================
--- llvm/trunk/include/llvm/ADT/EpochTracker.h (added)
+++ llvm/trunk/include/llvm/ADT/EpochTracker.h Tue Mar  3 14:46:45 2015
@@ -0,0 +1,95 @@
+//===- llvm/ADT/EpochTracker.h - ADT epoch tracking --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes.
+// These can be used to write iterators that are fail-fast when LLVM is built
+// with asserts enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_EPOCH_TRACKER_H
+#define LLVM_ADT_EPOCH_TRACKER_H
+
+#include <cstdint>
+
+namespace llvm {
+
+#ifdef NDEBUG
+
+class DebugEpochBase {
+public:
+  void incrementEpoch() {}
+
+  class HandleBase {
+  public:
+    HandleBase() {}
+    explicit HandleBase(const DebugEpochBase *) {}
+    bool isHandleInSync() { return true; }
+  };
+};
+
+#else
+
+/// \brief A base class for data structure classes wishing to make iterators
+/// ("handles") pointing into themselves fail-fast.  When building without
+/// asserts, this class is empty and does nothing.
+///
+/// DebugEpochBase does not by itself track handles pointing into itself.  The
+/// expectation is that routines touching the handles will poll on
+/// isHandleInSync at appropriate points to assert that the handle they're using
+/// is still valid.
+///
+class DebugEpochBase {
+  uint64_t Epoch;
+
+public:
+  DebugEpochBase() : Epoch(0) {}
+
+  /// \brief Calling incrementEpoch invalidates all handles pointing into the
+  /// calling instance.
+  void incrementEpoch() { ++Epoch; }
+
+  /// \brief The destructor calls incrementEpoch to make use-after-free bugs
+  /// more likely to crash deterministically.
+  ~DebugEpochBase() { incrementEpoch(); }
+
+  /// \brief A base class for iterator classes ("handles") that wish to poll for
+  /// iterator invalidating modifications in the underlying data structure.
+  /// When LLVM is built without asserts, this class is empty and does nothing.
+  ///
+  /// HandleBase does not track the parent data structure by itself.  It expects
+  /// the routines modifying the data structure to call incrementEpoch when they
+  /// make an iterator-invalidating modification.
+  ///
+  class HandleBase {
+    const uint64_t *EpochAddress;
+    uint64_t EpochAtCreation;
+
+  public:
+    HandleBase() : EpochAddress(nullptr), EpochAtCreation(UINT64_MAX) {}
+
+    explicit HandleBase(const DebugEpochBase *Parent)
+        : EpochAddress(&Parent->Epoch), EpochAtCreation(Parent->Epoch) {}
+
+    /// \brief Returns true if the DebugEpochBase this Handle is linked to has
+    /// not called incrementEpoch on itself since the creation of this
+    /// HandleBase instance.
+    bool isHandleInSync() const { return *EpochAddress == EpochAtCreation; }
+
+    /// \brief Returns a pointer to the epoch word stored in the data structure
+    /// this handle points into.
+    const uint64_t *getEpochAddress() const { return EpochAddress; }
+  };
+};
+
+#endif
+
+} // namespace llvm
+
+#endif





More information about the llvm-commits mailing list