[llvm] r206643 - OnDiskHashTable: Expect the Info type to declare the offset type

Duncan P. N. Exon Smith dexonsmith at apple.com
Fri Apr 18 16:52:37 PDT 2014


On 2014-Apr-18, at 13:39, Justin Bogner <mail at justinbogner.com> wrote:

> Author: bogner
> Date: Fri Apr 18 15:39:46 2014
> New Revision: 206643
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=206643&view=rev
> Log:
> OnDiskHashTable: Expect the Info type to declare the offset type
> 
> This changes the on-disk hash to get the type to use for offsets from
> the Info type, so that clients can be more flexible with the size of
> table they support.
> 
> Modified:
>    llvm/trunk/include/llvm/Support/OnDiskHashTable.h
> 
> Modified: llvm/trunk/include/llvm/Support/OnDiskHashTable.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/OnDiskHashTable.h?rev=206643&r1=206642&r2=206643&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Support/OnDiskHashTable.h (original)
> +++ llvm/trunk/include/llvm/Support/OnDiskHashTable.h Fri Apr 18 15:39:46 2014
> @@ -40,11 +40,12 @@ namespace llvm {
> ///   typedef ExampleData data_type; // Must be copy constructible
> ///   typedef ExampleData &data_type_ref;
> ///   typedef uint32_t hash_value_type; // The type the hash function returns.
> +///   typedef uint32_t offset_type; // The type for offsets into the table.
> ///
> ///   /// Calculate the hash for Key
> ///   static hash_value_type ComputeHash(key_type_ref Key);
> ///   /// Return the lengths, in bytes, of the given Key/Data pair.
> -///   static std::pair<unsigned, unsigned>
> +///   static std::pair<offset_type, offset_type>
> ///   EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
> ///   /// Write Key to Out.  KeyLen is the length from EmitKeyDataLength.
> ///   static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen);
> @@ -54,8 +55,9 @@ namespace llvm {
> /// };
> /// \endcode
> template <typename Info> class OnDiskChainedHashTableGenerator {
> -  unsigned NumBuckets;
> -  unsigned NumEntries;
> +  typedef typename Info::offset_type offset_type;
> +  offset_type NumBuckets;
> +  offset_type NumEntries;
>   llvm::BumpPtrAllocator BA;
> 
>   /// \brief A single item in the hash table.
> @@ -74,7 +76,7 @@ template <typename Info> class OnDiskCha
>   /// \brief A linked list of values in a particular hash bucket.
>   class Bucket {
>   public:
> -    uint32_t Off;
> +    offset_type Off;
>     Item *Head;
>     unsigned Length;
> 
> @@ -96,7 +98,7 @@ private:
>   void resize(size_t NewSize) {
>     Bucket *NewBuckets = (Bucket *)std::calloc(NewSize, sizeof(Bucket));
>     // Populate NewBuckets with the old entries.
> -    for (unsigned I = 0; I < NumBuckets; ++I)
> +    for (size_t I = 0; I < NumBuckets; ++I)

Should this be offset_type?

>       for (Item *E = Buckets[I].Head; E;) {
>         Item *N = E->Next;
>         E->Next = 0;
> @@ -131,7 +133,7 @@ public:
>   }
> 
>   /// \brief Emit the table to Out, which must not be at offset 0.
> -  uint32_t Emit(raw_ostream &Out) {
> +  offset_type Emit(raw_ostream &Out) {
>     Info InfoObj;
>     return Emit(Out, InfoObj);
>   }
> @@ -139,12 +141,12 @@ public:
>   /// \brief Emit the table to Out, which must not be at offset 0.
>   ///
>   /// Uses the provided Info instead of a stack allocated one.
> -  uint32_t Emit(raw_ostream &Out, Info &InfoObj) {
> +  offset_type Emit(raw_ostream &Out, Info &InfoObj) {
>     using namespace llvm::support;
>     endian::Writer<little> LE(Out);
> 
>     // Emit the payload of the table.
> -    for (unsigned I = 0; I < NumBuckets; ++I) {
> +    for (size_t I = 0; I < NumBuckets; ++I) {

Same here?

>       Bucket &B = Buckets[I];
>       if (!B.Head)
>         continue;
> @@ -160,7 +162,7 @@ public:
>       // Write out the entries in the bucket.
>       for (Item *I = B.Head; I; I = I->Next) {
>         LE.write<typename Info::hash_value_type>(I->Hash);
> -        const std::pair<unsigned, unsigned> &Len =
> +        const std::pair<offset_type, offset_type> &Len =
>             InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
>         InfoObj.EmitKey(Out, I->Key, Len.first);
>         InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
> @@ -168,17 +170,17 @@ public:
>     }
> 
>     // Pad with zeros so that we can start the hashtable at an aligned address.
> -    uint32_t TableOff = Out.tell();
> -    uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<uint32_t>());
> +    offset_type TableOff = Out.tell();
> +    uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<offset_type>());
>     TableOff += N;
>     while (N--)
>       LE.write<uint8_t>(0);
> 
>     // Emit the hashtable itself.
> -    LE.write<uint32_t>(NumBuckets);
> -    LE.write<uint32_t>(NumEntries);
> -    for (unsigned I = 0; I < NumBuckets; ++I)
> -      LE.write<uint32_t>(Buckets[I].Off);
> +    LE.write<offset_type>(NumBuckets);
> +    LE.write<offset_type>(NumEntries);
> +    for (size_t I = 0; I < NumBuckets; ++I)
> +      LE.write<offset_type>(Buckets[I].Off);
> 
>     return TableOff;
>   }
> @@ -207,6 +209,7 @@ public:
> ///   typedef ExampleInternalKey internal_key_type; // The stored key type.
> ///   typedef ExampleKey external_key_type; // The type to pass to find().
> ///   typedef uint32_t hash_value_type; // The type the hash function returns.
> +///   typedef uint32_t offset_type; // The type for offsets into the table.
> ///
> ///   /// Compare two keys for equality.
> ///   static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
> @@ -219,7 +222,7 @@ public:
> ///   static const internal_key_type &GetInternalKey(external_key_type &EKey);
> ///   /// Read the key and data length from Buffer, leaving it pointing at the
> ///   /// following byte.
> -///   static std::pair<unsigned, unsigned>
> +///   static std::pair<offset_type, offset_type>
> ///   ReadKeyDataLength(const unsigned char *&Buffer);
> ///   /// Read the key from Buffer, given the KeyLen as reported from
> ///   /// ReadKeyDataLength.
> @@ -232,8 +235,8 @@ public:
> /// };
> /// \endcode
> template <typename Info> class OnDiskChainedHashTable {
> -  const unsigned NumBuckets;
> -  const unsigned NumEntries;
> +  const typename Info::offset_type NumBuckets;
> +  const typename Info::offset_type NumEntries;
>   const unsigned char *const Buckets;
>   const unsigned char *const Base;
>   Info InfoObj;
> @@ -243,8 +246,9 @@ public:
>   typedef typename Info::external_key_type external_key_type;
>   typedef typename Info::data_type         data_type;
>   typedef typename Info::hash_value_type   hash_value_type;
> +  typedef typename Info::offset_type       offset_type;
> 
> -  OnDiskChainedHashTable(unsigned NumBuckets, unsigned NumEntries,
> +  OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
>                          const unsigned char *Buckets,
>                          const unsigned char *Base,
>                          const Info &InfoObj = Info())
> @@ -254,8 +258,8 @@ public:
>            "'buckets' must have a 4-byte alignment");
>   }
> 
> -  unsigned getNumBuckets() const { return NumBuckets; }
> -  unsigned getNumEntries() const { return NumEntries; }
> +  offset_type getNumBuckets() const { return NumBuckets; }
> +  offset_type getNumEntries() const { return NumEntries; }
>   const unsigned char *getBase() const { return Base; }
>   const unsigned char *getBuckets() const { return Buckets; }
> 
> @@ -287,11 +291,11 @@ public:
>     const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
>     hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
> 
> -    // Each bucket is just a 32-bit offset into the hash table file.
> -    unsigned Idx = KeyHash & (NumBuckets - 1);
> -    const unsigned char *Bucket = Buckets + sizeof(uint32_t) * Idx;
> +    // Each bucket is just an offset into the hash table file.
> +    offset_type Idx = KeyHash & (NumBuckets - 1);
> +    const unsigned char *Bucket = Buckets + sizeof(offset_type) * Idx;
> 
> -    unsigned Offset = endian::readNext<uint32_t, little, aligned>(Bucket);
> +    offset_type Offset = endian::readNext<offset_type, little, aligned>(Bucket);
>     if (Offset == 0)
>       return iterator(); // Empty bucket.
>     const unsigned char *Items = Base + Offset;
> @@ -306,8 +310,9 @@ public:
>           endian::readNext<hash_value_type, little, unaligned>(Items);
> 
>       // Determine the length of the key and the data.
> -      const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Items);
> -      unsigned ItemLen = L.first + L.second;
> +      const std::pair<offset_type, offset_type> &L =
> +          Info::ReadKeyDataLength(Items);
> +      offset_type ItemLen = L.first + L.second;
> 
>       // Compare the hashes.  If they are not the same, skip the entry entirely.
>       if (ItemHash != KeyHash) {
> @@ -353,8 +358,10 @@ public:
>     assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
>            "buckets should be 4-byte aligned.");
> 
> -    unsigned NumBuckets = endian::readNext<uint32_t, little, aligned>(Buckets);
> -    unsigned NumEntries = endian::readNext<uint32_t, little, aligned>(Buckets);
> +    offset_type NumBuckets =
> +        endian::readNext<offset_type, little, aligned>(Buckets);
> +    offset_type NumEntries =
> +        endian::readNext<offset_type, little, aligned>(Buckets);
>     return new OnDiskChainedHashTable<Info>(NumBuckets, NumEntries, Buckets,
>                                             Base, InfoObj);
>   }
> @@ -373,8 +380,9 @@ public:
>   typedef typename base_type::external_key_type external_key_type;
>   typedef typename base_type::data_type         data_type;
>   typedef typename base_type::hash_value_type   hash_value_type;
> +  typedef typename base_type::offset_type       offset_type;
> 
> -  OnDiskIterableChainedHashTable(unsigned NumBuckets, unsigned NumEntries,
> +  OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
>                                  const unsigned char *Buckets,
>                                  const unsigned char *Payload,
>                                  const unsigned char *Base,
> @@ -385,14 +393,14 @@ public:
>   /// \brief Iterates over all of the keys in the table.
>   class key_iterator {
>     const unsigned char *Ptr;
> -    unsigned NumItemsInBucketLeft;
> -    unsigned NumEntriesLeft;
> +    offset_type NumItemsInBucketLeft;
> +    offset_type NumEntriesLeft;
>     Info *InfoObj;
> 
>   public:
>     typedef external_key_type value_type;
> 
> -    key_iterator(const unsigned char *const Ptr, unsigned NumEntries,
> +    key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
>                  Info *InfoObj)
>         : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
>           InfoObj(InfoObj) {}
> @@ -416,7 +424,8 @@ public:
>       }
>       Ptr += sizeof(hash_value_type); // Skip the hash.
>       // Determine the length of the key and the data.
> -      const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Ptr);
> +      const std::pair<offset_type, offset_type> &L =
> +          Info::ReadKeyDataLength(Ptr);
>       Ptr += L.first + L.second;
>       assert(NumItemsInBucketLeft);
>       --NumItemsInBucketLeft;
> @@ -435,7 +444,7 @@ public:
>       LocalPtr += sizeof(hash_value_type); // Skip the hash.
> 
>       // Determine the length of the key and the data.
> -      const std::pair<unsigned, unsigned> &L =
> +      const std::pair<offset_type, offset_type> &L =
>           Info::ReadKeyDataLength(LocalPtr);
> 
>       // Read the key.
> @@ -456,14 +465,14 @@ public:
>   /// \brief Iterates over all the entries in the table, returning the data.
>   class data_iterator {
>     const unsigned char *Ptr;
> -    unsigned NumItemsInBucketLeft;
> -    unsigned NumEntriesLeft;
> +    offset_type NumItemsInBucketLeft;
> +    offset_type NumEntriesLeft;
>     Info *InfoObj;
> 
>   public:
>     typedef data_type value_type;
> 
> -    data_iterator(const unsigned char *const Ptr, unsigned NumEntries,
> +    data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
>                   Info *InfoObj)
>         : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
>           InfoObj(InfoObj) {}
> @@ -487,7 +496,8 @@ public:
>       }
>       Ptr += sizeof(hash_value_type); // Skip the hash.
>       // Determine the length of the key and the data.
> -      const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Ptr);
> +      const std::pair<offset_type, offset_type> &L =
> +          Info::ReadKeyDataLength(Ptr);
>       Ptr += L.first + L.second;
>       assert(NumItemsInBucketLeft);
>       --NumItemsInBucketLeft;
> @@ -506,7 +516,7 @@ public:
>       LocalPtr += sizeof(hash_value_type); // Skip the hash.
> 
>       // Determine the length of the key and the data.
> -      const std::pair<unsigned, unsigned> &L =
> +      const std::pair<offset_type, offset_type> &L =
>           Info::ReadKeyDataLength(LocalPtr);
> 
>       // Read the key.
> @@ -545,8 +555,10 @@ public:
>     assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
>            "buckets should be 4-byte aligned.");
> 
> -    unsigned NumBuckets = endian::readNext<uint32_t, little, aligned>(Buckets);
> -    unsigned NumEntries = endian::readNext<uint32_t, little, aligned>(Buckets);
> +    offset_type NumBuckets =
> +        endian::readNext<offset_type, little, aligned>(Buckets);
> +    offset_type NumEntries =
> +        endian::readNext<offset_type, little, aligned>(Buckets);
>     return new OnDiskIterableChainedHashTable<Info>(
>         NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj);
>   }
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list