From 344ced1159432a82aae8b143192ee74cfa13a550 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Wed, 7 Jan 2026 19:07:29 -0500 Subject: [PATCH] xo-arena: refactor: migrate DArenaHashMap state for rehash [WIP] --- include/xo/arena/DArenaHashMap.hpp | 203 ++++++++++++++++++++--------- 1 file changed, 144 insertions(+), 59 deletions(-) diff --git a/include/xo/arena/DArenaHashMap.hpp b/include/xo/arena/DArenaHashMap.hpp index c003dc0..510f26a 100644 --- a/include/xo/arena/DArenaHashMap.hpp +++ b/include/xo/arena/DArenaHashMap.hpp @@ -156,6 +156,42 @@ namespace xo { } #endif }; + + template + struct HashMapStore : DArenaHashMapUtil { + public: + using value_type = std::pair; + + public: + /** group_exp2: number of groups {x, 2^x} **/ + explicit HashMapStore(const std::pair & group_exp2) + : size_{0}, + n_group_exponent_{group_exp2.first}, + n_group_{group_exp2.second}, + n_slot_{group_exp2.second * c_group_size}, + control_{DArenaVector::map(ArenaConfig{.size_ = n_slot_ + c_group_size})}, + slots_{DArenaVector::map(ArenaConfig{.size_ = n_slot_ * sizeof(value_type)})} + {} + + public: + /** number of pairs in this table **/ + size_type size_ = 0; + /** base-2 logarithm of n_group_ **/ + size_type n_group_exponent_ = 0; + /** table has capacity for this number of groups. always an exact power of two. + * number of slots is n_group_ * c_group_size + **/ + size_type n_group_ = (1 << n_group_exponent_); + /** table has capacity for this number of {key,value} pairs **/ + size_type n_slot_ = n_group_ * c_group_size; + /** control_[] partitioned into groups of c_group_size (16) consecutive elements + **/ + DArenaVector control_; + /** slots_[] holds {key,value} pairs **/ + DArenaVector slots_; + }; } /** @brief flat hash map of key-value pairs using dedicated DArenas for storage @@ -188,15 +224,15 @@ namespace xo { size_type hint_max_capacity = 0, bool debug_flag = false); - size_type empty() const noexcept { return size_ == 0; } - size_type groups() const noexcept { return n_group_; } - size_type size() const noexcept { return size_; } - size_type capacity() const noexcept { return n_group_ * c_group_size; } + size_type empty() const noexcept { return store_.size_ == 0; } + size_type groups() const noexcept { return store_.n_group_; } + size_type size() const noexcept { return store_.size_; } + size_type capacity() const noexcept { return store_.n_group_ * c_group_size; } - float load_factor() const noexcept { return size_ / static_cast(n_slot_); } + float load_factor() const noexcept { return store_.size_ / static_cast(store_.n_slot_); } - /** insert @p kv_pair into hash map. replaces any previous value - * stored under the same key. + /** insert @p kv_pair into hash map. + * Replaces any previous value stored under the same key. * * Return pair retval with: * reval.first: true if size incremented; @@ -207,12 +243,20 @@ namespace xo { **/ std::pair try_insert(const std::pair & kv_pair); + /** insert @p kv_pair into hash map. + * Increase table size if necessary + **/ + bool insert(const std::pair & kv_pair); + bool verify_ok(verify_policy p = verify_policy::throw_only()) const; private: + /** increase hash table size (invoke when max load factor reached) **/ + bool _try_grow(); + /** load group abstraction from control bytes starting at @p ix **/ group_type _load_group(size_type ix) { - return group_type(&(control_[ix])); + return group_type(&(store_.control_[ix])); } /** like ctrl_[ix] = h2, but maintain overflow copy @@ -225,6 +269,10 @@ namespace xo { key_hash hash_; /** key equal **/ key_equal equal_; + + /** hash table state contents + size-related attributes **/ + detail::HashMapStore store_; +#ifdef OBSOLETE /** number of pairs in this table **/ size_type size_ = 0; /** base-2 logarithm of n_group_ **/ @@ -240,6 +288,7 @@ namespace xo { DArenaVector control_; /** slots_[] holds {key,value} pairs **/ DArenaVector slots_; +#endif /** true to enable debug logging **/ bool debug_flag_ = false; }; @@ -262,34 +311,29 @@ namespace xo { bool debug_flag) : hash_{std::move(hash)}, equal_{std::move(eq)}, - size_{0}, - n_group_exponent_{lub_exp2(lub_group_mult(hint_max_capacity)).first}, - n_group_{lub_exp2(lub_group_mult(hint_max_capacity)).second}, - n_slot_{n_group_ * c_group_size}, - control_{DArenaVector::map(ArenaConfig{.size_ = n_slot_ + c_group_size})}, - slots_{DArenaVector::map(ArenaConfig{.size_ = n_slot_ * sizeof(value_type)})}, + store_{lub_exp2(lub_group_mult(hint_max_capacity))}, debug_flag_{debug_flag} { /* invariant: arenas have allocated address range, but no committed memory yet */ - this->control_.resize(n_slot_ + c_group_size); + this->store_.control_.resize(store_.n_slot_ + c_group_size); /* all slots marked empty initially */ - std::fill(this->control_.begin(), this->control_.end(), c_empty_slot); + std::fill(this->store_.control_.begin(), this->store_.control_.end(), c_empty_slot); - this->slots_.resize(n_slot_); + this->store_.slots_.resize(store_.n_slot_); } template void DArenaHashMap::_update_control(size_type ix, uint8_t h2) { - this->control_[ix] = h2; + this->store_.control_[ix] = h2; if (ix < c_group_size) { size_type N = this->capacity(); // refresh end-of-array copy - std::memcpy(&(control_[N]), &(control_[0]), c_group_size); + std::memcpy(&(store_.control_[N]), &(store_.control_[0]), c_group_size); } } @@ -328,9 +372,9 @@ namespace xo { // invariant: slot_ix in [0 .. N) - auto & slot = slots_[slot_ix]; + auto & slot = store_.slots_[slot_ix]; - if (slot.first == kv_pair.first) { + if (equal_(slot.first, kv_pair.first)) { // we have match on existing key; // replace associated value slot.second = kv_pair.second; @@ -370,14 +414,14 @@ namespace xo { // invariant: slot_ix in [0 .. N) - auto & slot = slots_[slot_ix]; + auto & slot = store_.slots_[slot_ix]; // mark slot occupied in control space; // maintain copy-at-end for overflow this->_update_control(slot_ix, h2); new (&slot) value_type(kv_pair); - ++(this->size_); + ++(this->store_.size_); // true: increased table size return std::make_pair(&slot, true); @@ -395,6 +439,46 @@ namespace xo { } } + template + bool + DArenaHashMap::_try_grow() + { +#ifdef NOT_YET + size_type n_group_exponent_2x = n_group_exponent_ + 1; + size_type n_group_2x = n_group_ * 2; + size_type n_slot_2x_ = n_group_2x * c_group_size; + + auto control_2x = DArenaVector::map(ArenaConfig{.size_ = n_slot_2x_ + c_group_size}); + auto slot_2x = DArenaVector::map(ArenaConfig{.size_ = n_slot_2x_ * sizeof(value_type)); +#endif + + /* rehash contents -> [control_2x, slot_2x] */ + + return false; + } + + template + bool + DArenaHashMap::insert(const std::pair & kv_pair) + { + auto [slot_addr, ins_flag] = this->try_insert(kv_pair); + + if (slot_addr) + return ins_flag; + + assert((store_.size_ + 1) / static_cast(store_.n_slot_) >= c_max_load_factor); + + if (this->_try_grow()) { + /* retry insert, with bigger table */ + auto [slot_addr, ins_flag] = this->try_insert(kv_pair); + + return ins_flag; + } else { + // TODO: set last error. Presumeably reached max size + return false; + } + } + /** * Verify DArenaHashMap class invariants. * @@ -427,53 +511,54 @@ namespace xo { using xo::xtag; constexpr const char * c_self = "DArenaHashMap::verify_ok"; - scope log(XO_DEBUG(debug_flag_), xtag("size", size_)); + scope log(XO_DEBUG(debug_flag_), + xtag("size", store_.size_)); /* SM1.1: size_ <= n_slot_ */ - if (size_ > n_slot_) { + if (store_.size_ > store_.n_slot_) { return policy.report_error(log, c_self, ": expect .size <= .n_slot", - xtag("size", size_), - xtag("n_slot", n_slot_)); + xtag("size", store_.size_), + xtag("n_slot", store_.n_slot_)); } /* SM1.2: control_[] size consistent with slots_[] size */ - if (control_.size() != n_slot_ + c_group_size) { + if (store_.control_.size() != store_.n_slot_ + c_group_size) { return policy.report_error(log, c_self, ": expect .control_.size = .n_slot + c_group_size", - xtag("control_.size", control_.size()), - xtag("n_slot", n_slot_), + xtag("control_.size", store_.control_.size()), + xtag("n_slot", store_.n_slot_), xtag("c_group_size", c_group_size)); } - if (slots_.size() != n_slot_) { + if (store_.slots_.size() != store_.n_slot_) { return policy.report_error(log, c_self, ": expect .slots_.size = .n_slot", - xtag("slots_.size", slots_.size()), - xtag("n_slot", n_slot_)); + xtag("slots_.size", store_.slots_.size()), + xtag("n_slot", store_.n_slot_)); } /* SM1.3: n_group_ consistent with n_group_exponent_ */ - if (n_group_ != (size_type{1} << n_group_exponent_)) { + if (store_.n_group_ != (size_type{1} << store_.n_group_exponent_)) { return policy.report_error(log, c_self, ": expect .n_group = 2^.n_group_exponent", - xtag("n_group", n_group_), - xtag("n_group_exponent", n_group_exponent_)); + xtag("n_group", store_.n_group_), + xtag("n_group_exponent", store_.n_group_exponent_)); } /* SM1.4: n_slot_ consistent with n_group_ */ - if (n_slot_ != n_group_ * c_group_size) { + if (store_.n_slot_ != store_.n_group_ * c_group_size) { return policy.report_error(log, c_self, ": expect .n_slot = .n_group * c_group_size", - xtag("n_slot", n_slot_), - xtag("n_group", n_group_), + xtag("n_slot", store_.n_slot_), + xtag("n_group", store_.n_group_), xtag("c_group_size", c_group_size)); } /* SM1.5: n_slot_ a power of 2 */ - if ((n_slot_ & (n_slot_ - 1)) != 0) { + if ((store_.n_slot_ & (store_.n_slot_ - 1)) != 0) { return policy.report_error(log, c_self, ": expect .n_slot is power of 2", - xtag("n_slot", n_slot_)); + xtag("n_slot", store_.n_slot_)); } /* SM2.1: load_factor() <= c_max_load_factor */ @@ -486,37 +571,37 @@ namespace xo { /* SM3.1: control_[N+i] = control_[i] for i in [0, c_group_size) */ for (size_type i = 0; i < c_group_size; ++i) { - if (control_[n_slot_ + i] != control_[i]) { + if (store_.control_[store_.n_slot_ + i] != store_.control_[i]) { return policy.report_error(log, c_self, ": expect control_[N+i] = control_[i]", xtag("i", i), - xtag("control_[i]", control_[i]), - xtag("control_[N+i]", control_[n_slot_ + i])); + xtag("control_[i]", store_.control_[i]), + xtag("control_[N+i]", store_.control_[store_.n_slot_ + i])); } } /* SM3.2: {number of control_[i] spots with non-sentinel values} = size_ */ { size_type occupied_count = 0; - for (size_type i = 0; i < n_slot_; ++i) { - uint8_t c = control_[i]; + for (size_type i = 0; i < store_.n_slot_; ++i) { + uint8_t c = store_.control_[i]; if ((c != c_empty_slot) && (c != c_tombstone)) { ++occupied_count; } } - if (occupied_count != size_) { + if (occupied_count != store_.size_) { return policy.report_error(log, c_self, ": expect occupied control count = size", xtag("occupied_count", occupied_count), - xtag("size", size_)); + xtag("size", store_.size_)); } } /* SM4.1.1: if control_[i] is non-sentinel, control_[i] = hash_(slots_[i].first) & 0x7f */ - for (size_type i = 0; i < n_slot_; ++i) { - uint8_t c = control_[i]; + for (size_type i = 0; i < store_.n_slot_; ++i) { + uint8_t c = store_.control_[i]; if ((c != c_empty_slot) && (c != c_tombstone)) { - uint8_t expected_h2 = hash_(slots_[i].first) & 0x7f; + uint8_t expected_h2 = hash_(store_.slots_[i].first) & 0x7f; if (c != expected_h2) { return policy.report_error(log, c_self, ": expect control[i] = hash(key) & 0x7f", @@ -530,13 +615,13 @@ namespace xo { /* SM4.1.2: if control_[i] is non-sentinel, all slots in range [h .. i] are non-empty, * where h = (hash_(slots_[i].first) >> 7) & (n_slot_ - 1) */ - for (size_type i = 0; i < n_slot_; ++i) { - uint8_t c = control_[i]; + for (size_type i = 0; i < store_.n_slot_; ++i) { + uint8_t c = store_.control_[i]; if ((c != c_empty_slot) && (c != c_tombstone)) { - size_type h = (hash_(slots_[i].first) >> 7) & (n_slot_ - 1); + size_type h = (hash_(store_.slots_[i].first) >> 7) & (store_.n_slot_ - 1); size_type j = h; while (j != i) { - uint8_t cj = control_[j]; + uint8_t cj = store_.control_[j]; if ((cj == c_empty_slot) || (cj == c_tombstone)) { return policy.report_error(log, c_self, ": expect non-empty slot in probe range [h..i]", @@ -545,16 +630,16 @@ namespace xo { xtag("j", j), xtag("control[j]", cj)); } - j = (j + 1) & (n_slot_ - 1); + j = (j + 1) & (store_.n_slot_ - 1); } } } /* SM4.2: if control_[i] is empty or tombstone, slots_[i].first = key_type() */ - for (size_type i = 0; i < n_slot_; ++i) { - uint8_t c = control_[i]; + for (size_type i = 0; i < store_.n_slot_; ++i) { + uint8_t c = store_.control_[i]; if ((c == c_empty_slot) || (c == c_tombstone)) { - if (!(slots_[i].first == key_type())) { + if (!(store_.slots_[i].first == key_type())) { return policy.report_error(log, c_self, ": expect empty/tombstone slot has default key", xtag("i", i),