xo-arena: refactor: migrate DArenaHashMap state for rehash [WIP]
This commit is contained in:
parent
6647f081ba
commit
3a10699eeb
1 changed files with 144 additions and 59 deletions
|
|
@ -156,6 +156,42 @@ namespace xo {
|
|||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename Key,
|
||||
typename Value>
|
||||
struct HashMapStore : DArenaHashMapUtil {
|
||||
public:
|
||||
using value_type = std::pair<const Key, Value>;
|
||||
|
||||
public:
|
||||
/** group_exp2: number of groups {x, 2^x} **/
|
||||
explicit HashMapStore(const std::pair<size_type,
|
||||
size_type> & group_exp2)
|
||||
: size_{0},
|
||||
n_group_exponent_{group_exp2.first},
|
||||
n_group_{group_exp2.second},
|
||||
n_slot_{group_exp2.second * c_group_size},
|
||||
control_{DArenaVector<uint8_t>::map(ArenaConfig{.size_ = n_slot_ + c_group_size})},
|
||||
slots_{DArenaVector<value_type>::map(ArenaConfig{.size_ = n_slot_ * sizeof(value_type)})}
|
||||
{}
|
||||
|
||||
public:
|
||||
/** number of pairs in this table **/
|
||||
size_type size_ = 0;
|
||||
/** base-2 logarithm of n_group_ **/
|
||||
size_type n_group_exponent_ = 0;
|
||||
/** table has capacity for this number of groups. always an exact power of two.
|
||||
* number of slots is n_group_ * c_group_size
|
||||
**/
|
||||
size_type n_group_ = (1 << n_group_exponent_);
|
||||
/** table has capacity for this number of {key,value} pairs **/
|
||||
size_type n_slot_ = n_group_ * c_group_size;
|
||||
/** control_[] partitioned into groups of c_group_size (16) consecutive elements
|
||||
**/
|
||||
DArenaVector<uint8_t> control_;
|
||||
/** slots_[] holds {key,value} pairs **/
|
||||
DArenaVector<value_type> slots_;
|
||||
};
|
||||
}
|
||||
|
||||
/** @brief flat hash map of key-value pairs using dedicated DArenas for storage
|
||||
|
|
@ -188,15 +224,15 @@ namespace xo {
|
|||
size_type hint_max_capacity = 0,
|
||||
bool debug_flag = false);
|
||||
|
||||
size_type empty() const noexcept { return size_ == 0; }
|
||||
size_type groups() const noexcept { return n_group_; }
|
||||
size_type size() const noexcept { return size_; }
|
||||
size_type capacity() const noexcept { return n_group_ * c_group_size; }
|
||||
size_type empty() const noexcept { return store_.size_ == 0; }
|
||||
size_type groups() const noexcept { return store_.n_group_; }
|
||||
size_type size() const noexcept { return store_.size_; }
|
||||
size_type capacity() const noexcept { return store_.n_group_ * c_group_size; }
|
||||
|
||||
float load_factor() const noexcept { return size_ / static_cast<float>(n_slot_); }
|
||||
float load_factor() const noexcept { return store_.size_ / static_cast<float>(store_.n_slot_); }
|
||||
|
||||
/** insert @p kv_pair into hash map. replaces any previous value
|
||||
* stored under the same key.
|
||||
/** insert @p kv_pair into hash map.
|
||||
* Replaces any previous value stored under the same key.
|
||||
*
|
||||
* Return pair retval with:
|
||||
* reval.first: true if size incremented;
|
||||
|
|
@ -207,12 +243,20 @@ namespace xo {
|
|||
**/
|
||||
std::pair<value_type *, bool> try_insert(const std::pair<const Key, Value> & kv_pair);
|
||||
|
||||
/** insert @p kv_pair into hash map.
|
||||
* Increase table size if necessary
|
||||
**/
|
||||
bool insert(const std::pair<const Key, Value> & kv_pair);
|
||||
|
||||
bool verify_ok(verify_policy p = verify_policy::throw_only()) const;
|
||||
|
||||
private:
|
||||
/** increase hash table size (invoke when max load factor reached) **/
|
||||
bool _try_grow();
|
||||
|
||||
/** load group abstraction from control bytes starting at @p ix **/
|
||||
group_type _load_group(size_type ix) {
|
||||
return group_type(&(control_[ix]));
|
||||
return group_type(&(store_.control_[ix]));
|
||||
}
|
||||
|
||||
/** like ctrl_[ix] = h2, but maintain overflow copy
|
||||
|
|
@ -225,6 +269,10 @@ namespace xo {
|
|||
key_hash hash_;
|
||||
/** key equal **/
|
||||
key_equal equal_;
|
||||
|
||||
/** hash table state contents + size-related attributes **/
|
||||
detail::HashMapStore<Key, Value> store_;
|
||||
#ifdef OBSOLETE
|
||||
/** number of pairs in this table **/
|
||||
size_type size_ = 0;
|
||||
/** base-2 logarithm of n_group_ **/
|
||||
|
|
@ -240,6 +288,7 @@ namespace xo {
|
|||
DArenaVector<uint8_t> control_;
|
||||
/** slots_[] holds {key,value} pairs **/
|
||||
DArenaVector<value_type> slots_;
|
||||
#endif
|
||||
/** true to enable debug logging **/
|
||||
bool debug_flag_ = false;
|
||||
};
|
||||
|
|
@ -262,34 +311,29 @@ namespace xo {
|
|||
bool debug_flag)
|
||||
: hash_{std::move(hash)},
|
||||
equal_{std::move(eq)},
|
||||
size_{0},
|
||||
n_group_exponent_{lub_exp2(lub_group_mult(hint_max_capacity)).first},
|
||||
n_group_{lub_exp2(lub_group_mult(hint_max_capacity)).second},
|
||||
n_slot_{n_group_ * c_group_size},
|
||||
control_{DArenaVector<uint8_t>::map(ArenaConfig{.size_ = n_slot_ + c_group_size})},
|
||||
slots_{DArenaVector<value_type>::map(ArenaConfig{.size_ = n_slot_ * sizeof(value_type)})},
|
||||
store_{lub_exp2(lub_group_mult(hint_max_capacity))},
|
||||
debug_flag_{debug_flag}
|
||||
{
|
||||
/* invariant: arenas have allocated address range, but no committed memory yet */
|
||||
this->control_.resize(n_slot_ + c_group_size);
|
||||
this->store_.control_.resize(store_.n_slot_ + c_group_size);
|
||||
|
||||
/* all slots marked empty initially */
|
||||
std::fill(this->control_.begin(), this->control_.end(), c_empty_slot);
|
||||
std::fill(this->store_.control_.begin(), this->store_.control_.end(), c_empty_slot);
|
||||
|
||||
this->slots_.resize(n_slot_);
|
||||
this->store_.slots_.resize(store_.n_slot_);
|
||||
}
|
||||
|
||||
template <typename Key, typename Value, typename Hash, typename Equal>
|
||||
void
|
||||
DArenaHashMap<Key, Value, Hash, Equal>::_update_control(size_type ix, uint8_t h2)
|
||||
{
|
||||
this->control_[ix] = h2;
|
||||
this->store_.control_[ix] = h2;
|
||||
|
||||
if (ix < c_group_size) {
|
||||
size_type N = this->capacity();
|
||||
|
||||
// refresh end-of-array copy
|
||||
std::memcpy(&(control_[N]), &(control_[0]), c_group_size);
|
||||
std::memcpy(&(store_.control_[N]), &(store_.control_[0]), c_group_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -328,9 +372,9 @@ namespace xo {
|
|||
|
||||
// invariant: slot_ix in [0 .. N)
|
||||
|
||||
auto & slot = slots_[slot_ix];
|
||||
auto & slot = store_.slots_[slot_ix];
|
||||
|
||||
if (slot.first == kv_pair.first) {
|
||||
if (equal_(slot.first, kv_pair.first)) {
|
||||
// we have match on existing key;
|
||||
// replace associated value
|
||||
slot.second = kv_pair.second;
|
||||
|
|
@ -370,14 +414,14 @@ namespace xo {
|
|||
|
||||
// invariant: slot_ix in [0 .. N)
|
||||
|
||||
auto & slot = slots_[slot_ix];
|
||||
auto & slot = store_.slots_[slot_ix];
|
||||
|
||||
// mark slot occupied in control space;
|
||||
// maintain copy-at-end for overflow
|
||||
this->_update_control(slot_ix, h2);
|
||||
new (&slot) value_type(kv_pair);
|
||||
|
||||
++(this->size_);
|
||||
++(this->store_.size_);
|
||||
|
||||
// true: increased table size
|
||||
return std::make_pair(&slot, true);
|
||||
|
|
@ -395,6 +439,46 @@ namespace xo {
|
|||
}
|
||||
}
|
||||
|
||||
template <typename Key, typename Value, typename Hash, typename Equal>
|
||||
bool
|
||||
DArenaHashMap<Key, Value, Hash, Equal>::_try_grow()
|
||||
{
|
||||
#ifdef NOT_YET
|
||||
size_type n_group_exponent_2x = n_group_exponent_ + 1;
|
||||
size_type n_group_2x = n_group_ * 2;
|
||||
size_type n_slot_2x_ = n_group_2x * c_group_size;
|
||||
|
||||
auto control_2x = DArenaVector<uint8_t>::map(ArenaConfig{.size_ = n_slot_2x_ + c_group_size});
|
||||
auto slot_2x = DArenaVector<value_type>::map(ArenaConfig{.size_ = n_slot_2x_ * sizeof(value_type));
|
||||
#endif
|
||||
|
||||
/* rehash contents -> [control_2x, slot_2x] */
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Key, typename Value, typename Hash, typename Equal>
|
||||
bool
|
||||
DArenaHashMap<Key, Value, Hash, Equal>::insert(const std::pair<const Key, Value> & kv_pair)
|
||||
{
|
||||
auto [slot_addr, ins_flag] = this->try_insert(kv_pair);
|
||||
|
||||
if (slot_addr)
|
||||
return ins_flag;
|
||||
|
||||
assert((store_.size_ + 1) / static_cast<float>(store_.n_slot_) >= c_max_load_factor);
|
||||
|
||||
if (this->_try_grow()) {
|
||||
/* retry insert, with bigger table */
|
||||
auto [slot_addr, ins_flag] = this->try_insert(kv_pair);
|
||||
|
||||
return ins_flag;
|
||||
} else {
|
||||
// TODO: set last error. Presumeably reached max size
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify DArenaHashMap class invariants.
|
||||
*
|
||||
|
|
@ -427,53 +511,54 @@ namespace xo {
|
|||
using xo::xtag;
|
||||
|
||||
constexpr const char * c_self = "DArenaHashMap::verify_ok";
|
||||
scope log(XO_DEBUG(debug_flag_), xtag("size", size_));
|
||||
scope log(XO_DEBUG(debug_flag_),
|
||||
xtag("size", store_.size_));
|
||||
|
||||
/* SM1.1: size_ <= n_slot_ */
|
||||
if (size_ > n_slot_) {
|
||||
if (store_.size_ > store_.n_slot_) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect .size <= .n_slot",
|
||||
xtag("size", size_),
|
||||
xtag("n_slot", n_slot_));
|
||||
xtag("size", store_.size_),
|
||||
xtag("n_slot", store_.n_slot_));
|
||||
}
|
||||
|
||||
/* SM1.2: control_[] size consistent with slots_[] size */
|
||||
if (control_.size() != n_slot_ + c_group_size) {
|
||||
if (store_.control_.size() != store_.n_slot_ + c_group_size) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect .control_.size = .n_slot + c_group_size",
|
||||
xtag("control_.size", control_.size()),
|
||||
xtag("n_slot", n_slot_),
|
||||
xtag("control_.size", store_.control_.size()),
|
||||
xtag("n_slot", store_.n_slot_),
|
||||
xtag("c_group_size", c_group_size));
|
||||
}
|
||||
if (slots_.size() != n_slot_) {
|
||||
if (store_.slots_.size() != store_.n_slot_) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect .slots_.size = .n_slot",
|
||||
xtag("slots_.size", slots_.size()),
|
||||
xtag("n_slot", n_slot_));
|
||||
xtag("slots_.size", store_.slots_.size()),
|
||||
xtag("n_slot", store_.n_slot_));
|
||||
}
|
||||
|
||||
/* SM1.3: n_group_ consistent with n_group_exponent_ */
|
||||
if (n_group_ != (size_type{1} << n_group_exponent_)) {
|
||||
if (store_.n_group_ != (size_type{1} << store_.n_group_exponent_)) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect .n_group = 2^.n_group_exponent",
|
||||
xtag("n_group", n_group_),
|
||||
xtag("n_group_exponent", n_group_exponent_));
|
||||
xtag("n_group", store_.n_group_),
|
||||
xtag("n_group_exponent", store_.n_group_exponent_));
|
||||
}
|
||||
|
||||
/* SM1.4: n_slot_ consistent with n_group_ */
|
||||
if (n_slot_ != n_group_ * c_group_size) {
|
||||
if (store_.n_slot_ != store_.n_group_ * c_group_size) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect .n_slot = .n_group * c_group_size",
|
||||
xtag("n_slot", n_slot_),
|
||||
xtag("n_group", n_group_),
|
||||
xtag("n_slot", store_.n_slot_),
|
||||
xtag("n_group", store_.n_group_),
|
||||
xtag("c_group_size", c_group_size));
|
||||
}
|
||||
|
||||
/* SM1.5: n_slot_ a power of 2 */
|
||||
if ((n_slot_ & (n_slot_ - 1)) != 0) {
|
||||
if ((store_.n_slot_ & (store_.n_slot_ - 1)) != 0) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect .n_slot is power of 2",
|
||||
xtag("n_slot", n_slot_));
|
||||
xtag("n_slot", store_.n_slot_));
|
||||
}
|
||||
|
||||
/* SM2.1: load_factor() <= c_max_load_factor */
|
||||
|
|
@ -486,37 +571,37 @@ namespace xo {
|
|||
|
||||
/* SM3.1: control_[N+i] = control_[i] for i in [0, c_group_size) */
|
||||
for (size_type i = 0; i < c_group_size; ++i) {
|
||||
if (control_[n_slot_ + i] != control_[i]) {
|
||||
if (store_.control_[store_.n_slot_ + i] != store_.control_[i]) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect control_[N+i] = control_[i]",
|
||||
xtag("i", i),
|
||||
xtag("control_[i]", control_[i]),
|
||||
xtag("control_[N+i]", control_[n_slot_ + i]));
|
||||
xtag("control_[i]", store_.control_[i]),
|
||||
xtag("control_[N+i]", store_.control_[store_.n_slot_ + i]));
|
||||
}
|
||||
}
|
||||
|
||||
/* SM3.2: {number of control_[i] spots with non-sentinel values} = size_ */
|
||||
{
|
||||
size_type occupied_count = 0;
|
||||
for (size_type i = 0; i < n_slot_; ++i) {
|
||||
uint8_t c = control_[i];
|
||||
for (size_type i = 0; i < store_.n_slot_; ++i) {
|
||||
uint8_t c = store_.control_[i];
|
||||
if ((c != c_empty_slot) && (c != c_tombstone)) {
|
||||
++occupied_count;
|
||||
}
|
||||
}
|
||||
if (occupied_count != size_) {
|
||||
if (occupied_count != store_.size_) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect occupied control count = size",
|
||||
xtag("occupied_count", occupied_count),
|
||||
xtag("size", size_));
|
||||
xtag("size", store_.size_));
|
||||
}
|
||||
}
|
||||
|
||||
/* SM4.1.1: if control_[i] is non-sentinel, control_[i] = hash_(slots_[i].first) & 0x7f */
|
||||
for (size_type i = 0; i < n_slot_; ++i) {
|
||||
uint8_t c = control_[i];
|
||||
for (size_type i = 0; i < store_.n_slot_; ++i) {
|
||||
uint8_t c = store_.control_[i];
|
||||
if ((c != c_empty_slot) && (c != c_tombstone)) {
|
||||
uint8_t expected_h2 = hash_(slots_[i].first) & 0x7f;
|
||||
uint8_t expected_h2 = hash_(store_.slots_[i].first) & 0x7f;
|
||||
if (c != expected_h2) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect control[i] = hash(key) & 0x7f",
|
||||
|
|
@ -530,13 +615,13 @@ namespace xo {
|
|||
/* SM4.1.2: if control_[i] is non-sentinel, all slots in range [h .. i] are non-empty,
|
||||
* where h = (hash_(slots_[i].first) >> 7) & (n_slot_ - 1)
|
||||
*/
|
||||
for (size_type i = 0; i < n_slot_; ++i) {
|
||||
uint8_t c = control_[i];
|
||||
for (size_type i = 0; i < store_.n_slot_; ++i) {
|
||||
uint8_t c = store_.control_[i];
|
||||
if ((c != c_empty_slot) && (c != c_tombstone)) {
|
||||
size_type h = (hash_(slots_[i].first) >> 7) & (n_slot_ - 1);
|
||||
size_type h = (hash_(store_.slots_[i].first) >> 7) & (store_.n_slot_ - 1);
|
||||
size_type j = h;
|
||||
while (j != i) {
|
||||
uint8_t cj = control_[j];
|
||||
uint8_t cj = store_.control_[j];
|
||||
if ((cj == c_empty_slot) || (cj == c_tombstone)) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect non-empty slot in probe range [h..i]",
|
||||
|
|
@ -545,16 +630,16 @@ namespace xo {
|
|||
xtag("j", j),
|
||||
xtag("control[j]", cj));
|
||||
}
|
||||
j = (j + 1) & (n_slot_ - 1);
|
||||
j = (j + 1) & (store_.n_slot_ - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* SM4.2: if control_[i] is empty or tombstone, slots_[i].first = key_type() */
|
||||
for (size_type i = 0; i < n_slot_; ++i) {
|
||||
uint8_t c = control_[i];
|
||||
for (size_type i = 0; i < store_.n_slot_; ++i) {
|
||||
uint8_t c = store_.control_[i];
|
||||
if ((c == c_empty_slot) || (c == c_tombstone)) {
|
||||
if (!(slots_[i].first == key_type())) {
|
||||
if (!(store_.slots_[i].first == key_type())) {
|
||||
return policy.report_error(log,
|
||||
c_self, ": expect empty/tombstone slot has default key",
|
||||
xtag("i", i),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue