summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/esan/esan_hashtable.h250
-rw-r--r--test/esan/Unit/hashtable.cpp134
2 files changed, 384 insertions, 0 deletions
diff --git a/lib/esan/esan_hashtable.h b/lib/esan/esan_hashtable.h
new file mode 100644
index 000000000..52afcfd21
--- /dev/null
+++ b/lib/esan/esan_hashtable.h
@@ -0,0 +1,250 @@
+//===-- esan_hashtable.h ----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Generic resizing hashtable.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include <stddef.h>
+
+namespace __esan {
+
+//===----------------------------------------------------------------------===//
+// Default hash and comparison functions
+//===----------------------------------------------------------------------===//
+
+template <typename T> struct DefaultHash {
+ size_t operator()(const T &Key) const {
+ return (size_t)Key;
+ }
+};
+
+template <typename T> struct DefaultEqual {
+ bool operator()(const T &Key1, const T &Key2) const {
+ return Key1 == Key2;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// HashTable declaration
+//===----------------------------------------------------------------------===//
+
+// A simple resizing and mutex-locked hashtable.
+//
+// If the default hash functor is used, KeyTy must have an operator size_t().
+// If the default comparison functor is used, KeyTy must have an operator ==.
+//
+// By default all operations are internally-synchronized with a mutex, with no
+// synchronization for payloads once hashtable functions return. If
+// ExternalLock is set to true, the caller should call the lock() and unlock()
+// routines around all hashtable operations and subsequent manipulation of
+// payloads.
+template <typename KeyTy, typename DataTy, bool ExternalLock = false,
+ typename HashFuncTy = DefaultHash<KeyTy>,
+ typename EqualFuncTy = DefaultEqual<KeyTy> >
+class HashTable {
+public:
+ // InitialCapacity must be a power of 2.
+ // ResizeFactor must be between 1 and 99 and indicates the
+ // maximum percentage full that the table should ever be.
+ HashTable(u32 InitialCapacity = 2048, u32 ResizeFactor = 70);
+ ~HashTable();
+ bool lookup(const KeyTy &Key, DataTy &Payload); // Const except for Mutex.
+ bool add(const KeyTy &Key, const DataTy &Payload);
+ bool remove(const KeyTy &Key);
+ u32 size(); // Const except for Mutex.
+ // If the table is internally-synchronized, this lock must not be held
+ // while a hashtable function is called as it will deadlock: the lock
+ // is not recursive. This is meant for use with externally-synchronized
+ // tables.
+ void lock();
+ void unlock();
+
+private:
+ void resize();
+
+ struct HashEntry {
+ KeyTy Key;
+ DataTy Payload;
+ HashEntry *Next;
+ };
+
+ HashEntry **Table;
+ u32 Capacity;
+ u32 Entries;
+ const u32 ResizeFactor;
+ BlockingMutex Mutex;
+ const HashFuncTy HashFunc;
+ const EqualFuncTy EqualFunc;
+};
+
+//===----------------------------------------------------------------------===//
+// Hashtable implementation
+//===----------------------------------------------------------------------===//
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::HashTable(
+ u32 InitialCapacity, u32 ResizeFactor)
+ : Capacity(InitialCapacity), Entries(0), ResizeFactor(ResizeFactor),
+ HashFunc(HashFuncTy()), EqualFunc(EqualFuncTy()) {
+ CHECK(IsPowerOfTwo(Capacity));
+ CHECK(ResizeFactor >= 1 && ResizeFactor <= 99);
+ Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
+ internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::~HashTable() {
+ for (u32 i = 0; i < Capacity; ++i) {
+ HashEntry *Entry = Table[i];
+ while (Entry != nullptr) {
+ HashEntry *Next = Entry->Next;
+ Entry->Payload.~DataTy();
+ InternalFree(Entry);
+ Entry = Next;
+ }
+ }
+ InternalFree(Table);
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+u32 HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::size() {
+ u32 Res;
+ if (!ExternalLock)
+ Mutex.Lock();
+ Res = Entries;
+ if (!ExternalLock)
+ Mutex.Unlock();
+ return Res;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lookup(
+ const KeyTy &Key, DataTy &Payload) {
+ if (!ExternalLock)
+ Mutex.Lock();
+ bool Found = false;
+ size_t Hash = HashFunc(Key) % Capacity;
+ HashEntry *Entry = Table[Hash];
+ for (; Entry != nullptr; Entry = Entry->Next) {
+ if (EqualFunc(Entry->Key, Key)) {
+ Payload = Entry->Payload;
+ Found = true;
+ break;
+ }
+ }
+ if (!ExternalLock)
+ Mutex.Unlock();
+ return Found;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::resize() {
+ if (!ExternalLock)
+ Mutex.CheckLocked();
+ size_t OldCapacity = Capacity;
+ HashEntry **OldTable = Table;
+ Capacity *= 2;
+ Table = (HashEntry **)InternalAlloc(Capacity * sizeof(HashEntry *));
+ internal_memset(Table, 0, Capacity * sizeof(HashEntry *));
+ // Re-hash
+ for (u32 i = 0; i < OldCapacity; ++i) {
+ HashEntry *OldEntry = OldTable[i];
+ while (OldEntry != nullptr) {
+ HashEntry *Next = OldEntry->Next;
+ size_t Hash = HashFunc(OldEntry->Key) % Capacity;
+ OldEntry->Next = Table[Hash];
+ Table[Hash] = OldEntry;
+ OldEntry = Next;
+ }
+ }
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::add(
+ const KeyTy &Key, const DataTy &Payload) {
+ if (!ExternalLock)
+ Mutex.Lock();
+ bool Exists = false;
+ size_t Hash = HashFunc(Key) % Capacity;
+ HashEntry *Entry = Table[Hash];
+ for (; Entry != nullptr; Entry = Entry->Next) {
+ if (EqualFunc(Entry->Key, Key)) {
+ Exists = true;
+ break;
+ }
+ }
+ if (!Exists) {
+ Entries++;
+ if (Entries * 100 >= Capacity * ResizeFactor) {
+ resize();
+ Hash = HashFunc(Key) % Capacity;
+ }
+ HashEntry *Add = (HashEntry *)InternalAlloc(sizeof(*Add));
+ Add->Key = Key;
+ Add->Payload = Payload;
+ Add->Next = Table[Hash];
+ Table[Hash] = Add;
+ }
+ if (!ExternalLock)
+ Mutex.Unlock();
+ return !Exists;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+bool HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::remove(
+ const KeyTy &Key) {
+ if (!ExternalLock)
+ Mutex.Lock();
+ bool Found = false;
+ size_t Hash = HashFunc(Key) % Capacity;
+ HashEntry *Entry = Table[Hash];
+ HashEntry *Prev = nullptr;
+ for (; Entry != nullptr; Prev = Entry, Entry = Entry->Next) {
+ if (EqualFunc(Entry->Key, Key)) {
+ Found = true;
+ Entries--;
+ if (Prev == nullptr)
+ Table[Hash] = Entry->Next;
+ else
+ Prev->Next = Entry->Next;
+ Entry->Payload.~DataTy();
+ InternalFree(Entry);
+ break;
+ }
+ }
+ if (!ExternalLock)
+ Mutex.Unlock();
+ return Found;
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::lock() {
+ Mutex.Lock();
+}
+
+template <typename KeyTy, typename DataTy, bool ExternalLock,
+ typename HashFuncTy, typename EqualFuncTy>
+void HashTable<KeyTy, DataTy, ExternalLock, HashFuncTy, EqualFuncTy>::unlock() {
+ Mutex.Unlock();
+}
+
+} // namespace __esan
diff --git a/test/esan/Unit/hashtable.cpp b/test/esan/Unit/hashtable.cpp
new file mode 100644
index 000000000..5095be691
--- /dev/null
+++ b/test/esan/Unit/hashtable.cpp
@@ -0,0 +1,134 @@
+// RUN: %clangxx_unit -esan-instrument-loads-and-stores=0 -O0 %s -o %t 2>&1
+// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s
+
+#include "esan/esan_hashtable.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+class MyData {
+ public:
+ MyData(const char *Str) : RefCount(0) { Buf = strdup(Str); }
+ ~MyData() {
+ fprintf(stderr, " Destructor: %s.\n", Buf);
+ free(Buf);
+ }
+ bool operator==(MyData &Cmp) { return strcmp(Buf, Cmp.Buf) == 0; }
+ operator size_t() const {
+ size_t Res = 0;
+ for (int i = 0; i < strlen(Buf); ++i)
+ Res ^= Buf[i];
+ return Res;
+ }
+ char *Buf;
+ int RefCount;
+};
+
+// We use a smart pointer wrapper to free the payload on hashtable removal.
+struct MyDataPayload {
+ MyDataPayload() : Data(nullptr) {}
+ explicit MyDataPayload(MyData *Data) : Data(Data) { ++Data->RefCount; }
+ ~MyDataPayload() {
+ if (Data && --Data->RefCount == 0) {
+ fprintf(stderr, "Deleting %s.\n", Data->Buf);
+ delete Data;
+ }
+ }
+ MyDataPayload(const MyDataPayload &Copy) {
+ Data = Copy.Data;
+ ++Data->RefCount;
+ }
+ MyDataPayload & operator=(const MyDataPayload &Copy) {
+ if (this != &Copy) {
+ this->~MyDataPayload();
+ Data = Copy.Data;
+ ++Data->RefCount;
+ }
+ return *this;
+ }
+ bool operator==(MyDataPayload &Cmp) { return *Data == *Cmp.Data; }
+ operator size_t() const { return (size_t)*Data; }
+ MyData *Data;
+};
+
+int main()
+{
+ __esan::HashTable<int, int> IntTable;
+ assert(IntTable.size() == 0);
+ bool Added = IntTable.add(4, 42);
+ assert(Added);
+ assert(!IntTable.add(4, 42));
+ assert(IntTable.size() == 1);
+ int Value;
+ bool Found = IntTable.lookup(4, Value);
+ assert(Found && Value == 42);
+ assert(!IntTable.remove(5));
+ assert(IntTable.remove(4));
+
+ __esan::HashTable<int, MyDataPayload> DataTable(4);
+ MyDataPayload NewData(new MyData("mystring"));
+ Added = DataTable.add(4, NewData);
+ assert(Added);
+ MyDataPayload FoundData;
+ Found = DataTable.lookup(4, FoundData);
+ assert(Found && strcmp(FoundData.Data->Buf, "mystring") == 0);
+ assert(!DataTable.remove(5));
+ assert(DataTable.remove(4));
+ // Test resize.
+ for (int i = 0; i < 4; ++i) {
+ MyDataPayload MoreData(new MyData("delete-at-end"));
+ Added = DataTable.add(i+1, MoreData);
+ assert(Added);
+ assert(!DataTable.add(i+1, MoreData));
+ }
+ for (int i = 0; i < 4; ++i) {
+ Found = DataTable.lookup(i+1, FoundData);
+ assert(Found && strcmp(FoundData.Data->Buf, "delete-at-end") == 0);
+ }
+
+ // Test payload freeing via smart pointer wrapper.
+ __esan::HashTable<MyDataPayload, MyDataPayload, true> DataKeyTable;
+ MyDataPayload DataA(new MyData("string AB"));
+ DataKeyTable.lock();
+ Added = DataKeyTable.add(DataA, DataA);
+ assert(Added);
+ Found = DataKeyTable.lookup(DataA, FoundData);
+ assert(Found && strcmp(FoundData.Data->Buf, "string AB") == 0);
+ MyDataPayload DataB(new MyData("string AB"));
+ Added = DataKeyTable.add(DataB, DataB);
+ assert(!Added);
+ DataKeyTable.remove(DataB); // Should free the DataA payload.
+ DataKeyTable.unlock();
+
+ // Test custom functors.
+ struct CustomHash {
+ size_t operator()(int Key) const { return Key % 4; }
+ };
+ struct CustomEqual {
+ bool operator()(int Key1, int Key2) const { return Key1 %4 == Key2 % 4; }
+ };
+ __esan::HashTable<int, int, false, CustomHash, CustomEqual> ModTable;
+ Added = ModTable.add(2, 42);
+ assert(Added);
+ Added = ModTable.add(6, 42);
+ assert(!Added);
+
+ fprintf(stderr, "All checks passed.\n");
+ return 0;
+}
+// CHECK: Deleting mystring.
+// CHECK-NEXT: Destructor: mystring.
+// CHECK-NEXT: All checks passed.
+// CHECK-NEXT: Deleting string AB.
+// CHECK-NEXT: Destructor: string AB.
+// CHECK-NEXT: Deleting string AB.
+// CHECK-NEXT: Destructor: string AB.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT: Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT: Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT: Destructor: delete-at-end.
+// CHECK-NEXT: Deleting delete-at-end.
+// CHECK-NEXT: Destructor: delete-at-end.