337 lines
10 KiB
C
337 lines
10 KiB
C
|
//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
|
||
|
#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
|
||
|
|
||
|
#include "llvm/ADT/SparseBitVector.h"
|
||
|
#include "llvm/ADT/iterator.h"
|
||
|
#include "llvm/DebugInfo/PDB/Native/RawError.h"
|
||
|
#include "llvm/Support/BinaryStreamReader.h"
|
||
|
#include "llvm/Support/BinaryStreamWriter.h"
|
||
|
#include "llvm/Support/Endian.h"
|
||
|
#include "llvm/Support/Error.h"
|
||
|
#include <cstdint>
|
||
|
#include <iterator>
|
||
|
#include <utility>
|
||
|
#include <vector>
|
||
|
|
||
|
namespace llvm {
|
||
|
|
||
|
class BinaryStreamReader;
|
||
|
class BinaryStreamWriter;
|
||
|
|
||
|
namespace pdb {
|
||
|
|
||
|
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
|
||
|
Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
|
||
|
|
||
|
template <typename ValueT> class HashTable;
|
||
|
|
||
|
template <typename ValueT>
|
||
|
class HashTableIterator
|
||
|
: public iterator_facade_base<HashTableIterator<ValueT>,
|
||
|
std::forward_iterator_tag,
|
||
|
const std::pair<uint32_t, ValueT>> {
|
||
|
friend HashTable<ValueT>;
|
||
|
|
||
|
HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
|
||
|
bool IsEnd)
|
||
|
: Map(&Map), Index(Index), IsEnd(IsEnd) {}
|
||
|
|
||
|
public:
|
||
|
HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
|
||
|
int I = Map.Present.find_first();
|
||
|
if (I == -1) {
|
||
|
Index = 0;
|
||
|
IsEnd = true;
|
||
|
} else {
|
||
|
Index = static_cast<uint32_t>(I);
|
||
|
IsEnd = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
HashTableIterator(const HashTableIterator &R) = default;
|
||
|
HashTableIterator &operator=(const HashTableIterator &R) {
|
||
|
Map = R.Map;
|
||
|
return *this;
|
||
|
}
|
||
|
bool operator==(const HashTableIterator &R) const {
|
||
|
if (IsEnd && R.IsEnd)
|
||
|
return true;
|
||
|
if (IsEnd != R.IsEnd)
|
||
|
return false;
|
||
|
|
||
|
return (Map == R.Map) && (Index == R.Index);
|
||
|
}
|
||
|
const std::pair<uint32_t, ValueT> &operator*() const {
|
||
|
assert(Map->Present.test(Index));
|
||
|
return Map->Buckets[Index];
|
||
|
}
|
||
|
|
||
|
// Implement postfix op++ in terms of prefix op++ by using the superclass
|
||
|
// implementation.
|
||
|
using iterator_facade_base<HashTableIterator<ValueT>,
|
||
|
std::forward_iterator_tag,
|
||
|
const std::pair<uint32_t, ValueT>>::operator++;
|
||
|
HashTableIterator &operator++() {
|
||
|
while (Index < Map->Buckets.size()) {
|
||
|
++Index;
|
||
|
if (Map->Present.test(Index))
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
IsEnd = true;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
bool isEnd() const { return IsEnd; }
|
||
|
uint32_t index() const { return Index; }
|
||
|
|
||
|
const HashTable<ValueT> *Map;
|
||
|
uint32_t Index;
|
||
|
bool IsEnd;
|
||
|
};
|
||
|
|
||
|
template <typename ValueT>
|
||
|
class HashTable {
|
||
|
struct Header {
|
||
|
support::ulittle32_t Size;
|
||
|
support::ulittle32_t Capacity;
|
||
|
};
|
||
|
|
||
|
using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
|
||
|
|
||
|
public:
|
||
|
using const_iterator = HashTableIterator<ValueT>;
|
||
|
friend const_iterator;
|
||
|
|
||
|
HashTable() { Buckets.resize(8); }
|
||
|
explicit HashTable(uint32_t Capacity) {
|
||
|
Buckets.resize(Capacity);
|
||
|
}
|
||
|
|
||
|
Error load(BinaryStreamReader &Stream) {
|
||
|
const Header *H;
|
||
|
if (auto EC = Stream.readObject(H))
|
||
|
return EC;
|
||
|
if (H->Capacity == 0)
|
||
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
||
|
"Invalid Hash Table Capacity");
|
||
|
if (H->Size > maxLoad(H->Capacity))
|
||
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
||
|
"Invalid Hash Table Size");
|
||
|
|
||
|
Buckets.resize(H->Capacity);
|
||
|
|
||
|
if (auto EC = readSparseBitVector(Stream, Present))
|
||
|
return EC;
|
||
|
if (Present.count() != H->Size)
|
||
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
||
|
"Present bit vector does not match size!");
|
||
|
|
||
|
if (auto EC = readSparseBitVector(Stream, Deleted))
|
||
|
return EC;
|
||
|
if (Present.intersects(Deleted))
|
||
|
return make_error<RawError>(raw_error_code::corrupt_file,
|
||
|
"Present bit vector intersects deleted!");
|
||
|
|
||
|
for (uint32_t P : Present) {
|
||
|
if (auto EC = Stream.readInteger(Buckets[P].first))
|
||
|
return EC;
|
||
|
const ValueT *Value;
|
||
|
if (auto EC = Stream.readObject(Value))
|
||
|
return EC;
|
||
|
Buckets[P].second = *Value;
|
||
|
}
|
||
|
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
uint32_t calculateSerializedLength() const {
|
||
|
uint32_t Size = sizeof(Header);
|
||
|
|
||
|
constexpr int BitsPerWord = 8 * sizeof(uint32_t);
|
||
|
|
||
|
int NumBitsP = Present.find_last() + 1;
|
||
|
int NumBitsD = Deleted.find_last() + 1;
|
||
|
|
||
|
uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
|
||
|
uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
|
||
|
|
||
|
// Present bit set number of words (4 bytes), followed by that many actual
|
||
|
// words (4 bytes each).
|
||
|
Size += sizeof(uint32_t);
|
||
|
Size += NumWordsP * sizeof(uint32_t);
|
||
|
|
||
|
// Deleted bit set number of words (4 bytes), followed by that many actual
|
||
|
// words (4 bytes each).
|
||
|
Size += sizeof(uint32_t);
|
||
|
Size += NumWordsD * sizeof(uint32_t);
|
||
|
|
||
|
// One (Key, ValueT) pair for each entry Present.
|
||
|
Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
|
||
|
|
||
|
return Size;
|
||
|
}
|
||
|
|
||
|
Error commit(BinaryStreamWriter &Writer) const {
|
||
|
Header H;
|
||
|
H.Size = size();
|
||
|
H.Capacity = capacity();
|
||
|
if (auto EC = Writer.writeObject(H))
|
||
|
return EC;
|
||
|
|
||
|
if (auto EC = writeSparseBitVector(Writer, Present))
|
||
|
return EC;
|
||
|
|
||
|
if (auto EC = writeSparseBitVector(Writer, Deleted))
|
||
|
return EC;
|
||
|
|
||
|
for (const auto &Entry : *this) {
|
||
|
if (auto EC = Writer.writeInteger(Entry.first))
|
||
|
return EC;
|
||
|
if (auto EC = Writer.writeObject(Entry.second))
|
||
|
return EC;
|
||
|
}
|
||
|
return Error::success();
|
||
|
}
|
||
|
|
||
|
void clear() {
|
||
|
Buckets.resize(8);
|
||
|
Present.clear();
|
||
|
Deleted.clear();
|
||
|
}
|
||
|
|
||
|
bool empty() const { return size() == 0; }
|
||
|
uint32_t capacity() const { return Buckets.size(); }
|
||
|
uint32_t size() const { return Present.count(); }
|
||
|
|
||
|
const_iterator begin() const { return const_iterator(*this); }
|
||
|
const_iterator end() const { return const_iterator(*this, 0, true); }
|
||
|
|
||
|
/// Find the entry whose key has the specified hash value, using the specified
|
||
|
/// traits defining hash function and equality.
|
||
|
template <typename Key, typename TraitsT>
|
||
|
const_iterator find_as(const Key &K, TraitsT &Traits) const {
|
||
|
uint32_t H = Traits.hashLookupKey(K) % capacity();
|
||
|
uint32_t I = H;
|
||
|
Optional<uint32_t> FirstUnused;
|
||
|
do {
|
||
|
if (isPresent(I)) {
|
||
|
if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
|
||
|
return const_iterator(*this, I, false);
|
||
|
} else {
|
||
|
if (!FirstUnused)
|
||
|
FirstUnused = I;
|
||
|
// Insertion occurs via linear probing from the slot hint, and will be
|
||
|
// inserted at the first empty / deleted location. Therefore, if we are
|
||
|
// probing and find a location that is neither present nor deleted, then
|
||
|
// nothing must have EVER been inserted at this location, and thus it is
|
||
|
// not possible for a matching value to occur later.
|
||
|
if (!isDeleted(I))
|
||
|
break;
|
||
|
}
|
||
|
I = (I + 1) % capacity();
|
||
|
} while (I != H);
|
||
|
|
||
|
// The only way FirstUnused would not be set is if every single entry in the
|
||
|
// table were Present. But this would violate the load factor constraints
|
||
|
// that we impose, so it should never happen.
|
||
|
assert(FirstUnused);
|
||
|
return const_iterator(*this, *FirstUnused, true);
|
||
|
}
|
||
|
|
||
|
/// Set the entry using a key type that the specified Traits can convert
|
||
|
/// from a real key to an internal key.
|
||
|
template <typename Key, typename TraitsT>
|
||
|
bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
|
||
|
return set_as_internal(K, std::move(V), Traits, None);
|
||
|
}
|
||
|
|
||
|
template <typename Key, typename TraitsT>
|
||
|
ValueT get(const Key &K, TraitsT &Traits) const {
|
||
|
auto Iter = find_as(K, Traits);
|
||
|
assert(Iter != end());
|
||
|
return (*Iter).second;
|
||
|
}
|
||
|
|
||
|
protected:
|
||
|
bool isPresent(uint32_t K) const { return Present.test(K); }
|
||
|
bool isDeleted(uint32_t K) const { return Deleted.test(K); }
|
||
|
|
||
|
BucketList Buckets;
|
||
|
mutable SparseBitVector<> Present;
|
||
|
mutable SparseBitVector<> Deleted;
|
||
|
|
||
|
private:
|
||
|
/// Set the entry using a key type that the specified Traits can convert
|
||
|
/// from a real key to an internal key.
|
||
|
template <typename Key, typename TraitsT>
|
||
|
bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
|
||
|
Optional<uint32_t> InternalKey) {
|
||
|
auto Entry = find_as(K, Traits);
|
||
|
if (Entry != end()) {
|
||
|
assert(isPresent(Entry.index()));
|
||
|
assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
|
||
|
// We're updating, no need to do anything special.
|
||
|
Buckets[Entry.index()].second = V;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
auto &B = Buckets[Entry.index()];
|
||
|
assert(!isPresent(Entry.index()));
|
||
|
assert(Entry.isEnd());
|
||
|
B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
|
||
|
B.second = V;
|
||
|
Present.set(Entry.index());
|
||
|
Deleted.reset(Entry.index());
|
||
|
|
||
|
grow(Traits);
|
||
|
|
||
|
assert((find_as(K, Traits)) != end());
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
|
||
|
|
||
|
template <typename TraitsT>
|
||
|
void grow(TraitsT &Traits) {
|
||
|
uint32_t S = size();
|
||
|
uint32_t MaxLoad = maxLoad(capacity());
|
||
|
if (S < maxLoad(capacity()))
|
||
|
return;
|
||
|
assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
|
||
|
|
||
|
uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
|
||
|
|
||
|
// Growing requires rebuilding the table and re-hashing every item. Make a
|
||
|
// copy with a larger capacity, insert everything into the copy, then swap
|
||
|
// it in.
|
||
|
HashTable NewMap(NewCapacity);
|
||
|
for (auto I : Present) {
|
||
|
auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
|
||
|
NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
|
||
|
Buckets[I].first);
|
||
|
}
|
||
|
|
||
|
Buckets.swap(NewMap.Buckets);
|
||
|
std::swap(Present, NewMap.Present);
|
||
|
std::swap(Deleted, NewMap.Deleted);
|
||
|
assert(capacity() == NewCapacity);
|
||
|
assert(size() == S);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
} // end namespace pdb
|
||
|
|
||
|
} // end namespace llvm
|
||
|
|
||
|
#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
|