[llvm] r206643 - OnDiskHashTable: Expect the Info type to declare the offset type
Duncan P. N. Exon Smith
dexonsmith at apple.com
Fri Apr 18 16:52:37 PDT 2014
On 2014-Apr-18, at 13:39, Justin Bogner <mail at justinbogner.com> wrote:
> Author: bogner
> Date: Fri Apr 18 15:39:46 2014
> New Revision: 206643
>
> URL: http://llvm.org/viewvc/llvm-project?rev=206643&view=rev
> Log:
> OnDiskHashTable: Expect the Info type to declare the offset type
>
> This changes the on-disk hash to get the type to use for offsets from
> the Info type, so that clients can be more flexible with the size of
> table they support.
>
> Modified:
> llvm/trunk/include/llvm/Support/OnDiskHashTable.h
>
> Modified: llvm/trunk/include/llvm/Support/OnDiskHashTable.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/OnDiskHashTable.h?rev=206643&r1=206642&r2=206643&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Support/OnDiskHashTable.h (original)
> +++ llvm/trunk/include/llvm/Support/OnDiskHashTable.h Fri Apr 18 15:39:46 2014
> @@ -40,11 +40,12 @@ namespace llvm {
> /// typedef ExampleData data_type; // Must be copy constructible
> /// typedef ExampleData &data_type_ref;
> /// typedef uint32_t hash_value_type; // The type the hash function returns.
> +/// typedef uint32_t offset_type; // The type for offsets into the table.
> ///
> /// /// Calculate the hash for Key
> /// static hash_value_type ComputeHash(key_type_ref Key);
> /// /// Return the lengths, in bytes, of the given Key/Data pair.
> -/// static std::pair<unsigned, unsigned>
> +/// static std::pair<offset_type, offset_type>
> /// EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
> /// /// Write Key to Out. KeyLen is the length from EmitKeyDataLength.
> /// static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen);
> @@ -54,8 +55,9 @@ namespace llvm {
> /// };
> /// \endcode
> template <typename Info> class OnDiskChainedHashTableGenerator {
> - unsigned NumBuckets;
> - unsigned NumEntries;
> + typedef typename Info::offset_type offset_type;
> + offset_type NumBuckets;
> + offset_type NumEntries;
> llvm::BumpPtrAllocator BA;
>
> /// \brief A single item in the hash table.
> @@ -74,7 +76,7 @@ template <typename Info> class OnDiskCha
> /// \brief A linked list of values in a particular hash bucket.
> class Bucket {
> public:
> - uint32_t Off;
> + offset_type Off;
> Item *Head;
> unsigned Length;
>
> @@ -96,7 +98,7 @@ private:
> void resize(size_t NewSize) {
> Bucket *NewBuckets = (Bucket *)std::calloc(NewSize, sizeof(Bucket));
> // Populate NewBuckets with the old entries.
> - for (unsigned I = 0; I < NumBuckets; ++I)
> + for (size_t I = 0; I < NumBuckets; ++I)
Should this be offset_type?
> for (Item *E = Buckets[I].Head; E;) {
> Item *N = E->Next;
> E->Next = 0;
> @@ -131,7 +133,7 @@ public:
> }
>
> /// \brief Emit the table to Out, which must not be at offset 0.
> - uint32_t Emit(raw_ostream &Out) {
> + offset_type Emit(raw_ostream &Out) {
> Info InfoObj;
> return Emit(Out, InfoObj);
> }
> @@ -139,12 +141,12 @@ public:
> /// \brief Emit the table to Out, which must not be at offset 0.
> ///
> /// Uses the provided Info instead of a stack allocated one.
> - uint32_t Emit(raw_ostream &Out, Info &InfoObj) {
> + offset_type Emit(raw_ostream &Out, Info &InfoObj) {
> using namespace llvm::support;
> endian::Writer<little> LE(Out);
>
> // Emit the payload of the table.
> - for (unsigned I = 0; I < NumBuckets; ++I) {
> + for (size_t I = 0; I < NumBuckets; ++I) {
Same here?
> Bucket &B = Buckets[I];
> if (!B.Head)
> continue;
> @@ -160,7 +162,7 @@ public:
> // Write out the entries in the bucket.
> for (Item *I = B.Head; I; I = I->Next) {
> LE.write<typename Info::hash_value_type>(I->Hash);
> - const std::pair<unsigned, unsigned> &Len =
> + const std::pair<offset_type, offset_type> &Len =
> InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
> InfoObj.EmitKey(Out, I->Key, Len.first);
> InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
> @@ -168,17 +170,17 @@ public:
> }
>
> // Pad with zeros so that we can start the hashtable at an aligned address.
> - uint32_t TableOff = Out.tell();
> - uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<uint32_t>());
> + offset_type TableOff = Out.tell();
> + uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<offset_type>());
> TableOff += N;
> while (N--)
> LE.write<uint8_t>(0);
>
> // Emit the hashtable itself.
> - LE.write<uint32_t>(NumBuckets);
> - LE.write<uint32_t>(NumEntries);
> - for (unsigned I = 0; I < NumBuckets; ++I)
> - LE.write<uint32_t>(Buckets[I].Off);
> + LE.write<offset_type>(NumBuckets);
> + LE.write<offset_type>(NumEntries);
> + for (size_t I = 0; I < NumBuckets; ++I)
> + LE.write<offset_type>(Buckets[I].Off);
>
> return TableOff;
> }
> @@ -207,6 +209,7 @@ public:
> /// typedef ExampleInternalKey internal_key_type; // The stored key type.
> /// typedef ExampleKey external_key_type; // The type to pass to find().
> /// typedef uint32_t hash_value_type; // The type the hash function returns.
> +/// typedef uint32_t offset_type; // The type for offsets into the table.
> ///
> /// /// Compare two keys for equality.
> /// static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
> @@ -219,7 +222,7 @@ public:
> /// static const internal_key_type &GetInternalKey(external_key_type &EKey);
> /// /// Read the key and data length from Buffer, leaving it pointing at the
> /// /// following byte.
> -/// static std::pair<unsigned, unsigned>
> +/// static std::pair<offset_type, offset_type>
> /// ReadKeyDataLength(const unsigned char *&Buffer);
> /// /// Read the key from Buffer, given the KeyLen as reported from
> /// /// ReadKeyDataLength.
> @@ -232,8 +235,8 @@ public:
> /// };
> /// \endcode
> template <typename Info> class OnDiskChainedHashTable {
> - const unsigned NumBuckets;
> - const unsigned NumEntries;
> + const typename Info::offset_type NumBuckets;
> + const typename Info::offset_type NumEntries;
> const unsigned char *const Buckets;
> const unsigned char *const Base;
> Info InfoObj;
> @@ -243,8 +246,9 @@ public:
> typedef typename Info::external_key_type external_key_type;
> typedef typename Info::data_type data_type;
> typedef typename Info::hash_value_type hash_value_type;
> + typedef typename Info::offset_type offset_type;
>
> - OnDiskChainedHashTable(unsigned NumBuckets, unsigned NumEntries,
> + OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
> const unsigned char *Buckets,
> const unsigned char *Base,
> const Info &InfoObj = Info())
> @@ -254,8 +258,8 @@ public:
> "'buckets' must have a 4-byte alignment");
> }
>
> - unsigned getNumBuckets() const { return NumBuckets; }
> - unsigned getNumEntries() const { return NumEntries; }
> + offset_type getNumBuckets() const { return NumBuckets; }
> + offset_type getNumEntries() const { return NumEntries; }
> const unsigned char *getBase() const { return Base; }
> const unsigned char *getBuckets() const { return Buckets; }
>
> @@ -287,11 +291,11 @@ public:
> const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
> hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
>
> - // Each bucket is just a 32-bit offset into the hash table file.
> - unsigned Idx = KeyHash & (NumBuckets - 1);
> - const unsigned char *Bucket = Buckets + sizeof(uint32_t) * Idx;
> + // Each bucket is just an offset into the hash table file.
> + offset_type Idx = KeyHash & (NumBuckets - 1);
> + const unsigned char *Bucket = Buckets + sizeof(offset_type) * Idx;
>
> - unsigned Offset = endian::readNext<uint32_t, little, aligned>(Bucket);
> + offset_type Offset = endian::readNext<offset_type, little, aligned>(Bucket);
> if (Offset == 0)
> return iterator(); // Empty bucket.
> const unsigned char *Items = Base + Offset;
> @@ -306,8 +310,9 @@ public:
> endian::readNext<hash_value_type, little, unaligned>(Items);
>
> // Determine the length of the key and the data.
> - const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Items);
> - unsigned ItemLen = L.first + L.second;
> + const std::pair<offset_type, offset_type> &L =
> + Info::ReadKeyDataLength(Items);
> + offset_type ItemLen = L.first + L.second;
>
> // Compare the hashes. If they are not the same, skip the entry entirely.
> if (ItemHash != KeyHash) {
> @@ -353,8 +358,10 @@ public:
> assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
> "buckets should be 4-byte aligned.");
>
> - unsigned NumBuckets = endian::readNext<uint32_t, little, aligned>(Buckets);
> - unsigned NumEntries = endian::readNext<uint32_t, little, aligned>(Buckets);
> + offset_type NumBuckets =
> + endian::readNext<offset_type, little, aligned>(Buckets);
> + offset_type NumEntries =
> + endian::readNext<offset_type, little, aligned>(Buckets);
> return new OnDiskChainedHashTable<Info>(NumBuckets, NumEntries, Buckets,
> Base, InfoObj);
> }
> @@ -373,8 +380,9 @@ public:
> typedef typename base_type::external_key_type external_key_type;
> typedef typename base_type::data_type data_type;
> typedef typename base_type::hash_value_type hash_value_type;
> + typedef typename base_type::offset_type offset_type;
>
> - OnDiskIterableChainedHashTable(unsigned NumBuckets, unsigned NumEntries,
> + OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
> const unsigned char *Buckets,
> const unsigned char *Payload,
> const unsigned char *Base,
> @@ -385,14 +393,14 @@ public:
> /// \brief Iterates over all of the keys in the table.
> class key_iterator {
> const unsigned char *Ptr;
> - unsigned NumItemsInBucketLeft;
> - unsigned NumEntriesLeft;
> + offset_type NumItemsInBucketLeft;
> + offset_type NumEntriesLeft;
> Info *InfoObj;
>
> public:
> typedef external_key_type value_type;
>
> - key_iterator(const unsigned char *const Ptr, unsigned NumEntries,
> + key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
> Info *InfoObj)
> : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
> InfoObj(InfoObj) {}
> @@ -416,7 +424,8 @@ public:
> }
> Ptr += sizeof(hash_value_type); // Skip the hash.
> // Determine the length of the key and the data.
> - const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Ptr);
> + const std::pair<offset_type, offset_type> &L =
> + Info::ReadKeyDataLength(Ptr);
> Ptr += L.first + L.second;
> assert(NumItemsInBucketLeft);
> --NumItemsInBucketLeft;
> @@ -435,7 +444,7 @@ public:
> LocalPtr += sizeof(hash_value_type); // Skip the hash.
>
> // Determine the length of the key and the data.
> - const std::pair<unsigned, unsigned> &L =
> + const std::pair<offset_type, offset_type> &L =
> Info::ReadKeyDataLength(LocalPtr);
>
> // Read the key.
> @@ -456,14 +465,14 @@ public:
> /// \brief Iterates over all the entries in the table, returning the data.
> class data_iterator {
> const unsigned char *Ptr;
> - unsigned NumItemsInBucketLeft;
> - unsigned NumEntriesLeft;
> + offset_type NumItemsInBucketLeft;
> + offset_type NumEntriesLeft;
> Info *InfoObj;
>
> public:
> typedef data_type value_type;
>
> - data_iterator(const unsigned char *const Ptr, unsigned NumEntries,
> + data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
> Info *InfoObj)
> : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
> InfoObj(InfoObj) {}
> @@ -487,7 +496,8 @@ public:
> }
> Ptr += sizeof(hash_value_type); // Skip the hash.
> // Determine the length of the key and the data.
> - const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Ptr);
> + const std::pair<offset_type, offset_type> &L =
> + Info::ReadKeyDataLength(Ptr);
> Ptr += L.first + L.second;
> assert(NumItemsInBucketLeft);
> --NumItemsInBucketLeft;
> @@ -506,7 +516,7 @@ public:
> LocalPtr += sizeof(hash_value_type); // Skip the hash.
>
> // Determine the length of the key and the data.
> - const std::pair<unsigned, unsigned> &L =
> + const std::pair<offset_type, offset_type> &L =
> Info::ReadKeyDataLength(LocalPtr);
>
> // Read the key.
> @@ -545,8 +555,10 @@ public:
> assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
> "buckets should be 4-byte aligned.");
>
> - unsigned NumBuckets = endian::readNext<uint32_t, little, aligned>(Buckets);
> - unsigned NumEntries = endian::readNext<uint32_t, little, aligned>(Buckets);
> + offset_type NumBuckets =
> + endian::readNext<offset_type, little, aligned>(Buckets);
> + offset_type NumEntries =
> + endian::readNext<offset_type, little, aligned>(Buckets);
> return new OnDiskIterableChainedHashTable<Info>(
> NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj);
> }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list