From 2af28cafa94d9676f30f4bb929b8d5abd8a3460e Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Thu, 8 Jan 2026 00:36:51 -0500 Subject: [PATCH] xo-arena: DArenaHashMap: iterators + utest for them --- xo-arena/include/xo/arena/DArenaHashMap.hpp | 239 ++++++++++++++------ xo-arena/utest/DArenaHashMap.test.cpp | 42 ++++ 2 files changed, 217 insertions(+), 64 deletions(-) diff --git a/xo-arena/include/xo/arena/DArenaHashMap.hpp b/xo-arena/include/xo/arena/DArenaHashMap.hpp index 510f26a4..0c234631 100644 --- a/xo-arena/include/xo/arena/DArenaHashMap.hpp +++ b/xo-arena/include/xo/arena/DArenaHashMap.hpp @@ -162,6 +162,7 @@ namespace xo { struct HashMapStore : DArenaHashMapUtil { public: using value_type = std::pair; + using group_type = detail::Group; public: /** group_exp2: number of groups {x, 2^x} **/ @@ -175,23 +176,85 @@ namespace xo { slots_{DArenaVector::map(ArenaConfig{.size_ = n_slot_ * sizeof(value_type)})} {} + size_type empty() const noexcept { return size_ == 0; } + size_type capacity() const noexcept { return n_group_ * c_group_size; } + float load_factor() const noexcept { return size_ / static_cast(n_slot_); } + + group_type _load_group(size_type ix) { + return group_type(&(control_[ix])); + } + + void _update_control(size_type ix, uint8_t h2) { + this->control_[ix] = h2; + + if (ix < c_group_size) { + size_type N = this->capacity(); + + // refresh end-of-array copy + std::memcpy(&(control_[N]), &(control_[0]), c_group_size); + } + } + public: /** number of pairs in this table **/ size_type size_ = 0; /** base-2 logarithm of n_group_ **/ size_type n_group_exponent_ = 0; - /** table has capacity for this number of groups. always an exact power of two. + /** table has capacity for this number of groups. + * always an exact power of two. * number of slots is n_group_ * c_group_size **/ size_type n_group_ = (1 << n_group_exponent_); /** table has capacity for this number of {key,value} pairs **/ size_type n_slot_ = n_group_ * c_group_size; - /** control_[] partitioned into groups of c_group_size (16) consecutive elements + /** control_[] partitioned into groups of + * c_group_size (16) consecutive elements **/ DArenaVector control_; /** slots_[] holds {key,value} pairs **/ DArenaVector slots_; }; + + template + struct DArenaHashMapIterator : public DArenaHashMapUtil { + using value_type = std::pair; + + public: + DArenaHashMapIterator(uint8_t * c, uint8_t * e, value_type * p) + : ctrl_{c}, ctrl_end_{e}, pos_{p} {} + + value_type & operator*() const { return *pos_; } + value_type * operator->() const { return pos_; } + + bool operator==(const DArenaHashMapIterator & x) const { + return this->pos_ == x.pos_; + } + + bool operator!=(const DArenaHashMapIterator & x) const { + return this->pos_ != x.pos_; + } + + DArenaHashMapIterator & operator++() { + do { + ++ctrl_; + ++pos_; + } while ((ctrl_ != ctrl_end_) && this->is_sentinel()); + + return *this; + } + + bool is_sentinel() const { + return ((*ctrl_ == c_tombstone) || (*ctrl_ == c_empty_slot)); + } + + private: + uint8_t * ctrl_ = nullptr; + uint8_t * ctrl_end_ = nullptr; + + value_type * pos_ = nullptr; + }; + } /** @brief flat hash map of key-value pairs using dedicated DArenas for storage @@ -215,6 +278,9 @@ namespace xo { using key_equal = Equal; using byte = std::byte; using group_type = detail::Group; + using store_type = detail::HashMapStore; + using insert_value_type = std::pair; + using iterator = detail::DArenaHashMapIterator; /** create hash map **/ DArenaHashMap(size_type hint_max_capacity, @@ -224,12 +290,32 @@ namespace xo { size_type hint_max_capacity = 0, bool debug_flag = false); - size_type empty() const noexcept { return store_.size_ == 0; } + size_type empty() const noexcept { return store_.empty(); } size_type groups() const noexcept { return store_.n_group_; } size_type size() const noexcept { return store_.size_; } - size_type capacity() const noexcept { return store_.n_group_ * c_group_size; } + size_type capacity() const noexcept { return store_.capacity(); } + float load_factor() const noexcept { return store_.load_factor(); } - float load_factor() const noexcept { return store_.size_ / static_cast(store_.n_slot_); } + iterator begin() { + iterator ix(&(store_.control_[0]), + &(store_.control_[store_.capacity()]), + &(store_.slots_[0])); + + if (ix.is_sentinel()) { + /* first occupied position in table */ + ++ix; + } + + return ix; + } + + iterator end() { + iterator ix(&(store_.control_[store_.capacity()]), + &(store_.control_[store_.capacity()]), + &(store_.slots_[store_.capacity()])); + + return ix; + } /** insert @p kv_pair into hash map. * Replaces any previous value stored under the same key. @@ -241,54 +327,43 @@ namespace xo { * When table is full retval.second will be nullptr, * with error captured in last_error_ **/ - std::pair try_insert(const std::pair & kv_pair); + insert_value_type try_insert(const value_type & kv_pair); /** insert @p kv_pair into hash map. * Increase table size if necessary **/ - bool insert(const std::pair & kv_pair); + bool insert(const value_type & kv_pair); bool verify_ok(verify_policy p = verify_policy::throw_only()) const; private: + /** insert @p kv_pair, + * where key hashes to @p hash_value, into @p *store + **/ + insert_value_type _try_insert_aux(size_type hash_value, + const value_type & kv_pair, + store_type * p_store); + /** increase hash table size (invoke when max load factor reached) **/ bool _try_grow(); /** load group abstraction from control bytes starting at @p ix **/ - group_type _load_group(size_type ix) { - return group_type(&(store_.control_[ix])); - } + group_type _load_group(size_type ix) { return store_._load_group(ix); } /** like ctrl_[ix] = h2, but maintain overflow copy * at end of ctrl_[] array **/ - void _update_control(size_type ix, uint8_t h2); + void _update_control(size_type ix, uint8_t h2) { + return store_._update_control(ix, h2); + } private: /** hash function **/ key_hash hash_; /** key equal **/ key_equal equal_; - /** hash table state contents + size-related attributes **/ - detail::HashMapStore store_; -#ifdef OBSOLETE - /** number of pairs in this table **/ - size_type size_ = 0; - /** base-2 logarithm of n_group_ **/ - size_type n_group_exponent_ = 0; - /** table has capacity for this number of groups. always an exact power of two. - * number of slots is n_group_ * c_group_size - **/ - size_type n_group_ = (1 << n_group_exponent_); - /** table has capacity for this number of {key,value} pairs **/ - size_type n_slot_ = n_group_ * c_group_size; - /** control_[] partitioned into groups of c_group_size (16) consecutive elements - **/ - DArenaVector control_; - /** slots_[] holds {key,value} pairs **/ - DArenaVector slots_; -#endif + store_type store_; /** true to enable debug logging **/ bool debug_flag_ = false; }; @@ -318,36 +393,46 @@ namespace xo { this->store_.control_.resize(store_.n_slot_ + c_group_size); /* all slots marked empty initially */ - std::fill(this->store_.control_.begin(), this->store_.control_.end(), c_empty_slot); + std::fill(this->store_.control_.begin(), + this->store_.control_.end(), + c_empty_slot); this->store_.slots_.resize(store_.n_slot_); } - template - void - DArenaHashMap::_update_control(size_type ix, uint8_t h2) - { - this->store_.control_[ix] = h2; - - if (ix < c_group_size) { - size_type N = this->capacity(); - - // refresh end-of-array copy - std::memcpy(&(store_.control_[N]), &(store_.control_[0]), c_group_size); - } - } - template auto - DArenaHashMap::try_insert(const std::pair & kv_pair) -> std::pair + DArenaHashMap::try_insert(const value_type & kv_pair) -> insert_value_type { size_type h = hash_(kv_pair.first); + + return _try_insert_aux(h, kv_pair, &store_); + } + + template + auto + DArenaHashMap::_try_insert_aux(size_type hash_value, + const std::pair & kv_pair, + store_type * p_store) + -> std::pair + + { + size_type h = hash_value; // h1: hi bits: probe sequence size_type h1 = h >> 7; // h2: lo bits: store in control byte uint8_t h2 = h & 0x7f; - size_type N = this->capacity(); + size_type N = p_store->capacity(); // same as: // ix = h1 % N @@ -356,7 +441,7 @@ namespace xo { // will make series of probes for (;;) { - auto grp = _load_group(ix); + auto grp = p_store->_load_group(ix); { // look for matching slot to update @@ -372,7 +457,7 @@ namespace xo { // invariant: slot_ix in [0 .. N) - auto & slot = store_.slots_[slot_ix]; + auto & slot = p_store->slots_[slot_ix]; if (equal_(slot.first, kv_pair.first)) { // we have match on existing key; @@ -404,7 +489,7 @@ namespace xo { // Check here so that table can stay at max load factor // indefinitely as long as updates only // - if (load_factor() >= c_max_load_factor) { + if (p_store->load_factor() >= c_max_load_factor) { return std::make_pair(nullptr, false); } @@ -414,14 +499,14 @@ namespace xo { // invariant: slot_ix in [0 .. N) - auto & slot = store_.slots_[slot_ix]; + auto & slot = p_store->slots_[slot_ix]; // mark slot occupied in control space; // maintain copy-at-end for overflow - this->_update_control(slot_ix, h2); + p_store->_update_control(slot_ix, h2); new (&slot) value_type(kv_pair); - ++(this->store_.size_); + ++(p_store->size_); // true: increased table size return std::make_pair(&slot, true); @@ -443,23 +528,49 @@ namespace xo { bool DArenaHashMap::_try_grow() { -#ifdef NOT_YET - size_type n_group_exponent_2x = n_group_exponent_ + 1; - size_type n_group_2x = n_group_ * 2; - size_type n_slot_2x_ = n_group_2x * c_group_size; + size_type n_group_exponent_2x = store_.n_group_exponent_ + 1; + size_type n_group_2x = store_.n_group_ * 2; - auto control_2x = DArenaVector::map(ArenaConfig{.size_ = n_slot_2x_ + c_group_size}); - auto slot_2x = DArenaVector::map(ArenaConfig{.size_ = n_slot_2x_ * sizeof(value_type)); -#endif + detail::HashMapStore store_2x(std::make_pair(n_group_exponent_2x, + n_group_2x)); + /* rehash everything in store_, + * into store_2x + */ - /* rehash contents -> [control_2x, slot_2x] */ + for (size_type i = 0, n = store_.capacity(); i < n; ++i) { + uint8_t ctrl = store_.control_[i]; + value_type & kv_pair = store_.slots_[i]; - return false; + if ((ctrl != c_empty_slot) + && (ctrl != c_tombstone)) + { + size_type h = hash_(kv_pair.first); + auto chk = this->_try_insert_aux(h, kv_pair, &store_2x); + + if (!chk.second) { + // shenanigans - something isn't right. + // - may have run out of memory + assert(false); + + return false; + } + } + } + + this->store_ = std::move(store_2x); + + return true; } - template + template bool - DArenaHashMap::insert(const std::pair & kv_pair) + DArenaHashMap::insert(const std::pair & kv_pair) { auto [slot_addr, ins_flag] = this->try_insert(kv_pair); diff --git a/xo-arena/utest/DArenaHashMap.test.cpp b/xo-arena/utest/DArenaHashMap.test.cpp index 3f4a24fd..58d75ffb 100644 --- a/xo-arena/utest/DArenaHashMap.test.cpp +++ b/xo-arena/utest/DArenaHashMap.test.cpp @@ -65,6 +65,17 @@ namespace xo { REQUIRE(map.groups() == 1); REQUIRE(map.capacity() == DArenaHashMapUtil::c_group_size); REQUIRE(map.load_factor() == 1/16.0); + + /* verify iteration */ + { + size_t n = 0; + for (auto & ix : map) { + REQUIRE(ix.first == 1); + REQUIRE(ix.second == 11); + ++n; + } + REQUIRE(n == map.size()); + } } { @@ -77,6 +88,15 @@ namespace xo { REQUIRE(map.groups() == 1); REQUIRE(map.capacity() == DArenaHashMapUtil::c_group_size); REQUIRE(map.load_factor() == 2/16.0); + + /* verify iteration */ + { + size_t n = 0; + for (auto & ix : map) { + ++n; + } + REQUIRE(n == map.size()); + } } { @@ -89,6 +109,28 @@ namespace xo { REQUIRE(map.groups() == 1); REQUIRE(map.capacity() == DArenaHashMapUtil::c_group_size); REQUIRE(map.load_factor() == 3/16.0); + + /* verify iteration */ + { + size_t n = 0; + for (auto & ix : map) { + switch (ix.first) { + case 1: + REQUIRE(ix.second == 11); + break; + case 2: + REQUIRE(ix.second == 9); + break; + case 259: + REQUIRE(ix.second == 12); + break; + default: + REQUIRE(false); + } + ++n; + } + REQUIRE(n == map.size()); + } } }