[compiler-rt] r278024 - [esan] Add generic resizing hashtable

Derek Bruening via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 8 10:25:40 PDT 2016


Author: bruening
Date: Mon Aug  8 12:25:40 2016
New Revision: 278024

URL: http://llvm.org/viewvc/llvm-project?rev=278024&view=rev
Log:
[esan] Add generic resizing hashtable

Summary:
Adds a new, generic, resizing hashtable data structure for use by esan
tools.  No existing sanitizer hashtable is suitable for the use case for
most esan tools: we need non-fixed-size tables, parameterized keys and
payloads, and write access to payloads.  The new hashtable uses either
simple internal or external mutex locking and supports custom hash and
comparision operators.  The focus is on functionality, not performance, to
catalyze creation of a variety of tools.  We can optimize the more
successful tools later.

Adds tests of the data structure.

Reviewers: aizatsky

Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits, kubabrecka

Differential Revision: https://reviews.llvm.org/D22681

Added:
    compiler-rt/trunk/lib/esan/esan_hashtable.h   (with props)
    compiler-rt/trunk/test/esan/Unit/hashtable.cpp   (with props)

Added: compiler-rt/trunk/lib/esan/esan_hashtable.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/esan/esan_hashtable.h?rev=278024&view=auto
==============================================================================
--- compiler-rt/trunk/lib/esan/esan_hashtable.h (added)
+++ compiler-rt/trunk/lib/esan/esan_hashtable.h Mon Aug  8 12:25:40 2016
@@ -0,0 +1,250 @@
+//===-- esan_hashtable.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Generic resizing hashtable.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include <stddef.h>
+
+namespace __esan {
+
+//===----------------------------------------------------------------------===//
+// Default hash and comparison functions
+//===----------------------------------------------------------------------===//
+
+template <typename T> struct DefaultHash {
+  size_t operator()(const T &Key) const {
+    return (size_t)Key;
+  }
+};
+
+template <typename T> struct DefaultEqual {
+  bool operator()(const T &Key1, const T &Key2) const {
+    return Key1 == Key2;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// HashTable declaration
+//===----------------------------------------------------------------------===//
+
+// A simple resizing and mutex-locked hashtable.
+//
+// If the default hash functor is used, KeyTy must have an operator size_t().
+// If the default comparison functor is used, KeyTy must have an operator ==.
+//
+// By default all operations are internally-synchronized with a mutex, with no
+// synchronization for payloads once hashtable functions return.  If
+// ExternalLock is set to true, the caller should call the lock() and unlock()
+// routines around all hashtable operations and subsequent manipulation of
+// payloads.
+template <typename KeyTy, typename DataTy, bool ExternalLock = false,
+          typename HashFuncTy = DefaultHash<KeyTy>,
+          typename EqualFuncTy = DefaultEqual<KeyTy> >
+class HashTable {
+public:
+  // InitialCapacity must be a power of 2.
+  // ResizeFactor must be between 1 and 99 and indicates the
+  // maximum percentage full that the table should ever be.
+  HashTable(u32 InitialCapacity = 2048, u32 ResizeFactor = 70);
+  ~HashTable();
+  bool lookup(const KeyTy &Key, DataTy &Payload); // Const except for Mutex.
+  bool add(const KeyTy &Key, const DataTy &Payload);
+  bool remove(const KeyTy &Key);
+  u32 size(); // Const except for Mutex.
+  // If the table is internally-synchronized, this lock must not be held
+  // while a hashtable function is called as it will deadlock: the lock
+  // is not recursive.  This is meant for use with externally-synchronized
+  // tables.
+  void lock();
+  void unlock();
+
+private:
+  void resize();
+
+  struct HashEntry {
+    KeyTy Key;
+    DataTy Payload;
+    HashEntry *Next;
+  };
+
+  HashEntry **Table;
+  u32 Capacity;
+  u32 Entries;
+  const u32 ResizeFactor;
+  BlockingMutex Mutex;
+  const HashFuncTy HashFunc;
+  const EqualFuncTy EqualFunc;
+};
+
+//===----------------------------------------------------------------------===//
+// Hashtable implementation
+//===----------------------------------------------------------------------===//
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::HashTable(
+    u32 InitialCapacity, u32 ResizeFactor)
+    : Capacity(InitialCapacity), Entries(0), ResizeFactor(ResizeFactor),
+      HashFunc(HashFuncTy()), EqualFunc(EqualFuncTy()) {
+  CHECK(IsPowerOfTwo(Capacity));
+  CHECK(ResizeFactor >= 1 && ResizeFactor <= 99);
+  Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
+  internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::~HashTable() {
+  for (u32 i = 0; i < Capacity; ++i) {
+    HashEntry *Entry = Table[i];
+    while (Entry != nullptr) {
+      HashEntry *Next = Entry->Next;
+      Entry->Payload.~DataTy();
+      InternalFree(Entry);
+      Entry = Next;
+    }
+  }
+  InternalFree(Table);
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+u32 HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::size() {
+  u32 Res;
+  if (!ExternalLock)
+    Mutex.Lock();
+  Res = Entries;
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return Res;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lookup(
+    const KeyTy &Key, DataTy &Payload) {
+  if (!ExternalLock)
+    Mutex.Lock();
+  bool Found = false;
+  size_t Hash = HashFunc(Key) % Capacity;
+  HashEntry *Entry = Table[Hash];
+  for (; Entry != nullptr; Entry = Entry->Next) {
+    if (EqualFunc(Entry->Key, Key)) {
+      Payload = Entry->Payload;
+      Found = true;
+      break;
+    }
+  }
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return Found;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::resize() {
+  if (!ExternalLock)
+    Mutex.CheckLocked();
+  size_t OldCapacity = Capacity;
+  HashEntry **OldTable = Table;
+  Capacity *= 2;
+  Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
+  internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
+  // Re-hash
+  for (u32 i = 0; i < OldCapacity; ++i) {
+    HashEntry *OldEntry = OldTable[i];
+    while (OldEntry != nullptr) {
+      HashEntry *Next = OldEntry->Next;
+      size_t Hash = HashFunc(OldEntry->Key) % Capacity;
+      OldEntry->Next = Table[Hash];
+      Table[Hash] = OldEntry;
+      OldEntry = Next;
+    }
+  }
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::add(
+    const KeyTy &Key, const DataTy &Payload) {
+  if (!ExternalLock)
+    Mutex.Lock();
+  bool Exists = false;
+  size_t Hash = HashFunc(Key) % Capacity;
+  HashEntry *Entry = Table[Hash];
+  for (; Entry != nullptr; Entry = Entry->Next) {
+    if (EqualFunc(Entry->Key, Key)) {
+      Exists = true;
+      break;
+    }
+  }
+  if (!Exists) {
+    Entries++;
+    if (Entries * 100 >= Capacity * ResizeFactor) {
+      resize();
+      Hash = HashFunc(Key) % Capacity;
+    }
+    HashEntry *Add = (HashEntry *)InternalAlloc(sizeof(*Add));
+    Add->Key = Key;
+    Add->Payload = Payload;
+    Add->Next = Table[Hash];
+    Table[Hash] = Add;
+  }
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return !Exists;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::remove(
+    const KeyTy &Key) {
+  if (!ExternalLock)
+    Mutex.Lock();
+  bool Found = false;
+  size_t Hash = HashFunc(Key) % Capacity;
+  HashEntry *Entry = Table[Hash];
+  HashEntry *Prev = nullptr;
+  for (; Entry != nullptr; Prev = Entry, Entry = Entry->Next) {
+    if (EqualFunc(Entry->Key, Key)) {
+      Found = true;
+      Entries--;
+      if (Prev == nullptr)
+        Table[Hash] = Entry->Next;
+      else
+        Prev->Next = Entry->Next;
+      Entry->Payload.~DataTy();
+      InternalFree(Entry);
+      break;
+    }
+  }
+  if (!ExternalLock)
+    Mutex.Unlock();
+  return Found;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lock() {
+  Mutex.Lock();
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+          typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::unlock() {
+  Mutex.Unlock();
+}
+
+} // namespace __esan

Propchange: compiler-rt/trunk/lib/esan/esan_hashtable.h
------------------------------------------------------------------------------
    svn:eol-style = LF

Added: compiler-rt/trunk/test/esan/Unit/hashtable.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/esan/Unit/hashtable.cpp?rev=278024&view=auto
==============================================================================
--- compiler-rt/trunk/test/esan/Unit/hashtable.cpp (added)
+++ compiler-rt/trunk/test/esan/Unit/hashtable.cpp Mon Aug  8 12:25:40 2016
@@ -0,0 +1,134 @@
+// RUN: %clangxx_unit -esan-instrument-loads-and-stores=0 -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s
+
+#include "esan/esan_hashtable.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+class MyData {
+ public:
+  MyData(const char *Str) : RefCount(0) { Buf = strdup(Str); }
+  ~MyData() {
+    fprintf(stderr, "  Destructor: %s.\n", Buf);
+    free(Buf);
+  }
+  bool operator==(MyData &Cmp) { return strcmp(Buf, Cmp.Buf) == 0; }
+  operator size_t() const {
+    size_t Res = 0;
+    for (int i = 0; i < strlen(Buf); ++i)
+      Res ^= Buf[i];
+    return Res;
+  }
+  char *Buf;
+  int RefCount;
+};
+
+// We use a smart pointer wrapper to free the payload on hashtable removal.
+struct MyDataPayload {
+  MyDataPayload() : Data(nullptr) {}
+  explicit MyDataPayload(MyData *Data) : Data(Data) { ++Data->RefCount; }
+  ~MyDataPayload() {
+    if (Data && --Data->RefCount == 0) {
+      fprintf(stderr, "Deleting %s.\n", Data->Buf);
+      delete Data;
+    }
+  }
+  MyDataPayload(const MyDataPayload &Copy) {
+    Data = Copy.Data;
+    ++Data->RefCount;
+  }
+  MyDataPayload & operator=(const MyDataPayload &Copy) {
+    if (this != &Copy) {
+      this->~MyDataPayload();
+      Data = Copy.Data;
+      ++Data->RefCount;
+    }
+    return *this;
+  }
+  bool operator==(MyDataPayload &Cmp) { return *Data == *Cmp.Data; }
+  operator size_t() const { return (size_t)*Data; }
+  MyData *Data;
+};
+
+int main()
+{
+  __esan::HashTable<int, int> IntTable;
+  assert(IntTable.size() == 0);
+  bool Added = IntTable.add(4, 42);
+  assert(Added);
+  assert(!IntTable.add(4, 42));
+  assert(IntTable.size() == 1);
+  int Value;
+  bool Found = IntTable.lookup(4, Value);
+  assert(Found && Value == 42);
+  assert(!IntTable.remove(5));
+  assert(IntTable.remove(4));
+
+  __esan::HashTable<int, MyDataPayload> DataTable(4);
+  MyDataPayload NewData(new MyData("mystring"));
+  Added = DataTable.add(4, NewData);
+  assert(Added);
+  MyDataPayload FoundData;
+  Found = DataTable.lookup(4, FoundData);
+  assert(Found && strcmp(FoundData.Data->Buf, "mystring") == 0);
+  assert(!DataTable.remove(5));
+  assert(DataTable.remove(4));
+  // Test resize.
+  for (int i = 0; i < 4; ++i) {
+    MyDataPayload MoreData(new MyData("delete-at-end"));
+    Added = DataTable.add(i+1, MoreData);
+    assert(Added);
+    assert(!DataTable.add(i+1, MoreData));
+  }
+  for (int i = 0; i < 4; ++i) {
+    Found = DataTable.lookup(i+1, FoundData);
+    assert(Found && strcmp(FoundData.Data->Buf, "delete-at-end") == 0);
+  }
+
+  // Test payload freeing via smart pointer wrapper.
+  __esan::HashTable<MyDataPayload, MyDataPayload, true> DataKeyTable;
+  MyDataPayload DataA(new MyData("string AB"));
+  DataKeyTable.lock();
+  Added = DataKeyTable.add(DataA, DataA);
+  assert(Added);
+  Found = DataKeyTable.lookup(DataA, FoundData);
+  assert(Found && strcmp(FoundData.Data->Buf, "string AB") == 0);
+  MyDataPayload DataB(new MyData("string AB"));
+  Added = DataKeyTable.add(DataB, DataB);
+  assert(!Added);
+  DataKeyTable.remove(DataB); // Should free the DataA payload.
+  DataKeyTable.unlock();
+
+  // Test custom functors.
+  struct CustomHash {
+    size_t operator()(int Key) const { return Key % 4; }
+  };
+  struct CustomEqual {
+    bool operator()(int Key1, int Key2) const { return Key1 %4 == Key2 % 4; }
+  };
+  __esan::HashTable<int, int, false, CustomHash, CustomEqual> ModTable;
+  Added = ModTable.add(2, 42);
+  assert(Added);
+  Added = ModTable.add(6, 42);
+  assert(!Added);
+
+  fprintf(stderr, "All checks passed.\n");
+  return 0;
+}
+// CHECK:      Deleting mystring.
+// CHECK-NEXT:   Destructor: mystring.
+// CHECK-NEXT: All checks passed.
+// CHECK-NEXT: Deleting string AB.
+// CHECK-NEXT:   Destructor: string AB.
+// CHECK-NEXT: Deleting string AB.
+// CHECK-NEXT:   Destructor: string AB.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT:   Destructor: delete-at-end.

Propchange: compiler-rt/trunk/test/esan/Unit/hashtable.cpp
------------------------------------------------------------------------------
    svn:eol-style = LF




More information about the llvm-commits mailing list