diff --git a/xo-ordinaltree/.github/workflows/main.yml b/xo-ordinaltree/.github/workflows/main.yml new file mode 100644 index 00000000..14fe1134 --- /dev/null +++ b/xo-ordinaltree/.github/workflows/main.yml @@ -0,0 +1,122 @@ + +name: build xo-ordinaltree + xo dependencies + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + +jobs: + build: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + runs-on: ubuntu-latest + + steps: + - name: checkout source + uses: actions/checkout@v3 + + - name: Install catch2 + # install catch2. see [[https://stackoverflow.com/questions/57982945/how-to-apt-get-install-in-a-github-actions-workflow]] + run: sudo apt-get install -y catch2 + + - name: Install libbsd-dev + # provides arc4random_buf in randomgen + run: sudo apt-get install -y libbsd-dev + + # ---------------------------------------------------------------- + + - name: Clone xo-cmake + uses: actions/checkout@v3 + with: + repository: Rconybea/xo-cmake + path: repo/xo-cmake + + - name: Configure xo-cmake + run: cmake -B ${{github.workspace}}/build_xo-cmake -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local repo/xo-cmake + + - name: Build xo-cmake (trivial) + run: cmake --build ${{github.workspace}}/build_xo-cmake --config ${{env.BUILD_TYPE}} + + - name: Install xo-cmake + run: cmake --install ${{github.workspace}}/build_xo-cmake + + # ---------------------------------------------------------------- + + - name: Clone indentlog + uses: actions/checkout@v3 + with: + repository: Rconybea/indentlog + path: repo/indentlog + + - name: Configure indentlog + # configure cmake for indentlog in dedicated build directory. + run: cmake -B ${{github.workspace}}/build_indentlog -DCMAKE_MODULE_PATH=${{github.workspace}}/local/share/cmake -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local repo/indentlog + + - name: Build indentlog + run: cmake --build ${{github.workspace}}/build_indentlog --config ${{env.BUILD_TYPE}} + + - name: Install indentlog + # install into ${{github.workspace}}/local + run: cmake --install ${{github.workspace}}/build_indentlog + + # ---------------------------------------------------------------- + + - name: Clone randomgen + uses: actions/checkout@v3 + with: + repository: Rconybea/randomgen + path: repo/randomgen + + - name: Configure randomgen + # configure cmake for randomgen in dedicated build directory. + run: cmake -B ${{github.workspace}}/build_randomgen -DCMAKE_MODULE_PATH=${{github.workspace}}/local/share/cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/local -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local repo/randomgen + + - name: Build randomgen + run: cmake --build ${{github.workspace}}/build_randomgen --config ${{env.BUILD_TYPE}} + + - name: Install randomgen + # install into ${{github.workspace}}/local + run: cmake --install ${{github.workspace}}/build_randomgen + + # ---------------------------------------------------------------- + + - name: Clone refcnt + uses: actions/checkout@v3 + with: + repository: Rconybea/refcnt + path: repo/refcnt + + - name: Configure refcnt + # configure cmake for refcnt in dedicated build directory. + run: cmake -B ${{github.workspace}}/build_refcnt -DCMAKE_MODULE_PATH=${{github.workspace}}/local/share/cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/local -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local repo/refcnt + + - name: Build refcnt + run: cmake --build ${{github.workspace}}/build_refcnt --config ${{env.BUILD_TYPE}} + + - name: Install refcnt + # install into ${{github.workspace}}/local + run: cmake --install ${{github.workspace}}/build_refcnt + + # ---------------------------------------------------------------- + + - name: Configure self (xo-ordinaltree) + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build_ordinaltree -DCMAKE_MODULE_PATH=${{github.workspace}}/local/share/cmake -DCMAKE_PREFIX_PATH=${{github.workspace}}/local -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build self (xo-ordinaltree) + # Build your program with the given configuration + run: cmake --build ${{github.workspace}}/build_ordinaltree --config ${{env.BUILD_TYPE}} + + - name: Test self (xo-ordinaltree) + working-directory: ${{github.workspace}}/build_ordinaltree + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest -C ${{env.BUILD_TYPE}} diff --git a/xo-ordinaltree/.gitignore b/xo-ordinaltree/.gitignore new file mode 100644 index 00000000..13c0afb7 --- /dev/null +++ b/xo-ordinaltree/.gitignore @@ -0,0 +1,6 @@ +# clangd working space (see emacs+lsp) +.cache +# typical cmake build directory (source-tree-nephew) +.build* +# symlink to builddir/compile_commands.json; should be set manually in dev sandbox +compile_commands.json diff --git a/xo-ordinaltree/CMakeLists.txt b/xo-ordinaltree/CMakeLists.txt new file mode 100644 index 00000000..74ba5859 --- /dev/null +++ b/xo-ordinaltree/CMakeLists.txt @@ -0,0 +1,43 @@ +# xo-ordinaltree/CMakeLists.txt + +cmake_minimum_required(VERSION 3.10) + +project(xo_ordinaltree VERSION 0.1) +enable_language(CXX) + +# common XO macros (see github:Rconybea/xo-cmake) +include(GNUInstallDirs) +include(cmake/xo-bootstrap-macros.cmake) + +xo_cxx_toplevel_options3() + +# ---------------------------------------------------------------- +# c++ settings + +set(PROJECT_CXX_FLAGS "") +add_definitions(${PROJECT_CXX_FLAGS}) + +# ---------------------------------------------------------------- +# output targets + +add_subdirectory(utest) + +# ---------------------------------------------------------------- +# header-only library + +set(SELF_LIB xo_ordinaltree) +xo_add_headeronly_library(${SELF_LIB}) + +# ---------------------------------------------------------------- +# +xo_install_library4(${SELF_LIB} ${PROJECT_NAME}Targets) +# (note: ..Targets from xo_install_library2()) +xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets) + +# ---------------------------------------------------------------- +# input dependencies + +# NOTE: dependency set here must be kept consistent with ordinaltree/cmake/xo_ordinaltreeConfig.cmake.in + +# xo-ordinaltree is also header-only +xo_headeronly_dependency(${SELF_LIB} randomgen) diff --git a/xo-ordinaltree/README.md b/xo-ordinaltree/README.md new file mode 100644 index 00000000..9adda0d5 --- /dev/null +++ b/xo-ordinaltree/README.md @@ -0,0 +1,33 @@ +# ordinal tree library + +## Getting Started + +### build + install dependencies + +- see [github/Rconybea/randomgen](https://github.com/Rconybea/randomgen) -- random number generators e.g. xoshiro256ss +- see [github/Rconybea/refcnt](https://github.com/Rconybea/refcnt) -- intrusive reference-counting + +### build + install +``` +$ cd xo-ordinaltree +$ mkdir build +$ cd build +$ INSTALL_PREFIX=/usr/local # or wherever you prefer +$ cmake -DCMAKE_MODULE_PATH=${INSTALL_PREFIX}/share/cmake -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} .. +$ make +$ make install +``` + +### build for unit test coverage +``` +$ cd xo-ordinaltree +$ mkdir build-ccov +$ cd build-ccov +$ cmake -DCMAKE_MODULE_PATH=${INSTALL_PREFIX}/share/cmake -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCODE_COVERAGE=ON -DCMAKE_BUILD_TYPE=Debug .. +``` + +### LSP support +``` +$ cd xo-ordinaltree +$ ln -s build/compile_commands.json # lsp will look for compile_commands.json in the root of the source tree +``` diff --git a/xo-ordinaltree/cmake/xo-bootstrap-macros.cmake b/xo-ordinaltree/cmake/xo-bootstrap-macros.cmake new file mode 100644 index 00000000..96592216 --- /dev/null +++ b/xo-ordinaltree/cmake/xo-bootstrap-macros.cmake @@ -0,0 +1,14 @@ +if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL "prefix")) + # default to typical install location for xo-project-macros + set(CMAKE_MODULE_PATH ${CMAKE_INSTALL_PREFIX}/share/cmake) +endif() + +if (NOT XO_SUBMODULE_BUILD) + message("-- CMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}") + message("-- CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") +endif() + +# needs to have been installed somewhere on CMAKE_MODULE_PATH, +# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX) +# +include(xo_macros/xo-project-macros) diff --git a/xo-ordinaltree/cmake/xo_ordinaltreeConfig.cmake.in b/xo-ordinaltree/cmake/xo_ordinaltreeConfig.cmake.in new file mode 100644 index 00000000..7e308d14 --- /dev/null +++ b/xo-ordinaltree/cmake/xo_ordinaltreeConfig.cmake.in @@ -0,0 +1,6 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(randomgen) +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") +check_required_components("@PROJECT_NAME@") diff --git a/xo-ordinaltree/include/xo/ordinaltree/BplusTree.hpp b/xo-ordinaltree/include/xo/ordinaltree/BplusTree.hpp new file mode 100644 index 00000000..7810a031 --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/BplusTree.hpp @@ -0,0 +1,1799 @@ +/* @file BplusTree.hpp */ + +/* provides B+ tree with order statistics */ + +/* NOTES: + * - expect optimimum node size to be OS page size. + * + */ + +#pragma once + +//#include "bplustree/BplusTreeNode.hpp" +#include "bplustree/LeafNode.hpp" +#include "bplustree/InternalNode.hpp" +#include "bplustree/Iterator.hpp" +#include "bplustree/Lhs.hpp" +#include "bplustree/bplustree_tags.hpp" +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/tag.hpp" +#include "xo/indentlog/print/pad.hpp" +#include /* for std::unqiue_ptr */ +#include /* for std::max */ +#include /* for std::numeric_limits */ +#include +#include +#include +#if __APPLE__ && __MACH__ +# include +#endif + +namespace xo { + namespace tree { + /* + * +-------------+ + * | BplusTree | +--------------------+ + * | .properties +-----| BplusStdProperties | + * | .n_element | | .branching_factor | + * | .root | | .debug_flag | + * +------+------+ +--------------------+ + * | + * | .root + * | + * +-------------------+ +--------------+ +--------------+ + * | GenericNode | isa | LeafNode | .elt_v[i] | LeafNodeItem | + * | .node_type |<---+----| .elt_v[] +-------------| .kv_pair | + * | .parent | | | | | | + * | .n_elt | | +--------------+ +--------------+ + * | .branching_factor | | + * +-------------------+ | + * | + * | +--------------+ +------------------+ + * | | InternalNode | .elt_v[i] | InternalNodeItem | + * \----| .elt_v[] +-------------| .key | + * | | | .child | + * +--------------+ +------------------+ + * + * Invariants: + * - tree is always balanced -- every path from root to a LeafNode, visits the same number of InternalNodes. + * - all Nodes (both LeafNodes and InternalNodes) satisfy bf/2 <= .n_elt <= bf (where bf = BplusTree.properties.branching_factor) + * Details + * - if InternalNode p has p.elt_v[i].child = q, then q.parent = p + * - GenericNode.branching_factor = BplusTree.properties.branching_factor for all nodes in the same BplusTree + * + * Tree with 0 key/value pairs + * + * +--------------+ + * | BplusTree | + * | .root = null | + * +--------------+ + * + * Tree with [1 .. b] key/value pairs (with b = BplusTree.properties.branching_factor) + * + * +---------------+ + * | BplusTree | + * | .root = node1 | + * +--------+------+ + * | + * node1 | + * +----------------------------------------------+ + * | LeafNode | + * | .parent = null | + * | | + * | .elt_v[0] .elt_v[b-1] | b = BplusTree.properties.branching_factor - 1 + * | +----------------+- ... -+-----------------+ | .elt_v[i].kv_pair.first = i'th key + * | | k0 | v0 | | k(b-1) | v(b-1) | | .elt_v[i].kv_pair.second = i'th value + * | +----------------+- ... -+-----------------+ | + * +----------------------------------------------+ + * + * Tree with [b+1 ..] key/value pairs + * + * +---------------+ + * | BplusTree | + * | .root = node1 | + * +--------+------+ + * | + * node1 | + * +----------------------------------------------+ + * | InternalNode | + * | .parent = null | + * | | + * | .elt_v[0] .elt_v[b-1] | + * | +----------------+- ... -+-----------------+ | .elt_v[i].key = minimum key in subtree .elt_v[i].child + * | | k0 | node2 | | k(b-1) | node(b)| | + * | +----------------+- ... -+-----------------+ | + * | .key .child .key .child | + * +-------------------+-----+--------------------+ + * | | + * | | ... + * | | + * .elt_v[0].child | | .elt_v[1].child + * /--------------------/ \----------------------------\ + * | | + * node2 | node3 | + * +----------------------------------------------+ +-------------------------------------------------+ + * | LeafNode | | LeafNode | + * | .parent = node1 | | .parent = node1 | + * | | | | + * | .elt_v[0] .elt_v[b-1] | | .elt_v[0] .elt_v[b-1] | ..... + * | +----------------+- ... -+-----------------+ | | +--------+--------+- ... -+---------+---------+ | + * | | k0 | v0 | | k(b-1) | v(b-1) | | | | kb | vb | | k(2b-1) | v(2b-1) | | + * | +----------------+- ... -+-----------------+ | | +-----------------+- ... -+---------+---------+ | + * +----------------------------------------------+ +-------------------------------------------------+ + * + * + * Larger trees havedadditional levels comprising InternalNodes. + * + */ + + /* NullReduce: 0-size reduce function (disappears at compile time) */ + template + struct NullReduce; + + struct Machdep { + /* current page size (on a linux system). Probably 4K + * + * (a) need this to be at least large enough to + * hold 3 keys + * (b) note linux page size isn't fixed at compile time + */ + static inline std::size_t get_page_size() { + return ::sysconf(_SC_PAGESIZE); + } + + /* L1 cache line size (on a linux system). Probably 64 bytes */ + static inline std::size_t get_cache_line_size() { + // https://sourceforge.net/p/predef/wiki/OperatingSystems/ +# if __APPLE__ && __MACH__ + std::size_t line_size = 0; + std::size_t sizeof_line_size = sizeof(line_size); + ::sysctlbyname("hw.cachelinesize", + &line_size, &sizeof_line_size, 0, 0); + return line_size; +# else + return ::sysconf(_SC_LEVEL1_DCACHE_LINESIZE); +# endif + } + }; /*Machdep*/ + + /* B+ tree nodes come in several flavors: {root | internal | leaf}: + * + * - root. each B+ tree has exactly one node of this type, + * representing the root of the B+ tree. + * The root node is subject to fewer restrictions than other + * nodes in a B+ tree: + * - can have 2..b elements (where b is branching factor for this B+ tree). + * - can function as tree's one and only leaf node (if tree has <= b items). + * - can function as an internal node (if tree has > b items) + * + * - internal. an internal node has: + * - n child node pointers, subject to ceil(b/2) <= n <= b, + * where b is tree's branching factor + * - n keys. key[j] is the smallest key value in subtree j. + * see InternalNode + * + * - leaf. a leaf node has: + * - n keys, subject to ceil(b/2) <= n <= b, + * where b is tree's branching factor + * - n values. values are stored as pointers. + * - pointer to next leaf node, to streamline inorder traversal + */ + template + struct BplusStdProperties { + public: + using KeyType = Key; + using ValueType = Value; + + public: + BplusStdProperties() = default; + explicit BplusStdProperties(std::size_t bf, bool debug_flag) + : branching_factor_{bf}, debug_flag_{debug_flag} {} + + static constexpr tags::ordinal_tag ordinal_tag_value() { return OrdinalTag; } + static constexpr bool ordinal_enabled() { return OrdinalTag == tags::ordinal_enabled; } + + static constexpr std::size_t c_min_branching_factor = 3; + + /* compute branching factor for given (leaf) node size */ + static constexpr std::size_t branching_factor_for_size(std::size_t z) { + return std::max(c_min_branching_factor, + (z - sizeof(LeafNode)) + / (sizeof(LeafNodeItemPlaceholder))); + } /*branching_factor_for_size*/ + + /* default branching factor. + * attempt to optimize for cache efficiency of 'internal' nodes + * + * minimum branching factor always 3 + */ + static constexpr std::size_t default_branching_factor() { + return branching_factor_for_size(Machdep::get_page_size()); + } + + /* expect this will be min branching factor + * (i.e. smallest allowed LeafNode size likely won't fit in cache line): + * + * - cache line size = 64 bytes + * - leaf node overhead = 56 bytes + * - leaf node item size = 16 bytes + */ + static constexpr std::size_t default_cacheline_branching_factor() { + return branching_factor_for_size(Machdep::get_cache_line_size()); + } + + std::size_t branching_factor() const { return branching_factor_; } + bool debug_flag() const { return debug_flag_; } + + void set_debug_flag(bool x) { debug_flag_ = x; } + + private: + /* branching factor to use for both leaf an inteernal B+ tree nodes */ + std::size_t branching_factor_ = default_branching_factor(); + /* if true enable verbose logging during B+ tree operations */ + bool debug_flag_ = false; + }; /*BplusStdProperties*/ + + template + inline std::ostream & + operator<<(std::ostream & os, + BplusStdProperties const & p) + { + using xo::xtag; + + os << ""; + + return os; + } /*operator<<*/ + + /* B+ tree with order statistics + * + * require: + * - Key is equality comparable, and imposes total ordering on keys. + * - Key, Value, Reduce, Properties are copyable and null-constructible + * - Reduce.value_type = Accumulator + * - Reduce.operator() :: (Accumulator x Key) -> Accumulator + */ + template , + typename Properties = BplusStdProperties> + class BplusTree { + public: + using GenericNodeType = GenericNode; + using InternalNodeType = InternalNode; + using LeafNodeType = LeafNode; + using InternalNodeItemType = InternalNodeItem; + using LeafNodeItemType = LeafNodeItem; + using BpTreeConstLhs = detail::BplusTreeConstLhs>; + using BpTreeUtil = BplusTreeUtil; + + using key_type = Key; + using mapped_type = Value; + using value_type = std::pair; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + // key_compare + // allocator_type + using reference = value_type &; + using const_reference = value_type const &; + // pointer = std::allocator_traits::pointer; + // const_pointer = std::allocator_traits::const_pointer; + using const_iterator = detail::ConstIterator; + // reverse_iterator + // const_reverse_iterator + // value_compare (compares value_type objects by comparing their first elements) + + public: + BplusTree() = default; + explicit BplusTree(Properties const & properties) : properties_{properties} {} + + bool empty() const { return this->n_element_ == 0; } + size_type size() const { return this->n_element_; } + size_type max_size() const { return std::numeric_limits::max(); } + std::size_t branching_factor() const { return this->properties_.branching_factor(); } + + bool debug_flag() const { return this->properties_.debug_flag(); } + void set_debug_flag(bool x) { this->properties_.set_debug_flag(x); } + + /* verify b+ tree invariants. + * if invariants satisfied, return true. + * if not satisfied, + * - throw_flag=true -> throw execption + * - throw_flag=false -> return false; + */ + bool verify_ok(bool throw_flag = true) const { + using xo::scope; + using xo::xtag; + + //scope x("verify_ok"); + //x.log(xtag("n_element", this->n_element_)); + + std::size_t z = 0; + + try { + z = this->verify_helper(throw_flag); + } catch (...) { + if (throw_flag) + throw; + + return false; + } + + //x.log(xtag("z", z)); + + if (z != this->n_element_) { + if (throw_flag) { + std::string err = tostr("BplusTree::verify_ok" + ": bad key count", + xtag("expected", this->n_element_), + xtag("counted", z)); + + throw std::runtime_error(err); + } + + return false; + } + + return true; + } /*verify_ok*/ + + /* cxxx: const iterator + * rxxx: reverse iterator + * crxxx: const reverse iterator + */ + + const_iterator cprebegin() const { return const_iterator::prebegin_aux(this->leafnode_begin_); } + const_iterator cbegin() const { return const_iterator::begin_aux(this->leafnode_begin_); } + const_iterator cend() const { return const_iterator::end_aux(this->leafnode_end_); } + + const_iterator begin() const { return this->cbegin(); } + const_iterator end() const { return this->cend(); } + + const_iterator crprebegin() const { return const_iterator::rprebegin_aux(this->leafnode_end_); } + const_iterator crbegin() const { return const_iterator::rbegin_aux(this->leafnode_end_); } + const_iterator crend() const { return const_iterator::rend_aux(this->leafnode_begin_); } + + const_iterator rbegin() const { return this->crbegin(); } + const_iterator rend() const { return this->crend(); } + + /* find item with key equal to x in this tree. + * success -> return iterator ix with ix->first = x + * failure -> return iterator this->cend() + */ + const_iterator find(Key const & x) const { + FindNodeResult leaffindresult = this->find_leaf_node(x); + LeafNodeType const * leaf = leaffindresult.node(); + + std::pair lub_ix_recd = leaf->find_lub_ix(x); + + if (lub_ix_recd.first) { + return const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf, + lub_ix_recd.second - 1); + } else { + return this->cend(); + } + } /*find*/ + + /* find i'th key/value pair (in key order) in this tree. + * + * Require: + * - 0 <= i < .size + */ + const_iterator find_ith(std::size_t i_tree) const { + using xo::tostr; + using xo::xtag; + + if (i_tree >= this->size()) { + throw std::runtime_error(tostr("BplusTree::find_ith: expected index i in range [0..n)", + xtag("i", i_tree), + xtag("n", this->size()))); + } + + GenericNodeType * generic_node = this->root_.get(); + + return BplusTreeUtil::find_ith(generic_node, i_tree, this->cend()); + } /*find_ith*/ + + BpTreeConstLhs at(Key const & k) const { + const_iterator ix = this->find(k); + + if (ix == this->cend()) { + throw std::out_of_range(tostr("BplusTree::at: expected key argument to appear in tree", + xtag("key", k))); + } + + return BpTreeConstLhs(this, ix.item_addr()); + } /*at*/ + + /* e.g. + * BplusTree bptree = ...; + * Key key = ...; + * BplusTree::value_type x = bptree[key]; + */ + BpTreeConstLhs operator[](Key const & k) const { + const_iterator ix = this->find(k); + + return BpTreeConstLhs(this, ix.item_addr()); + } /*operator[]*/ + + void clear() { + this->n_element_ = 0; + this->leafnode_begin_ = nullptr; + this->leafnode_end_ = nullptr; + this->root_.reset(nullptr); + } /*clear*/ + + /* TODO: + * std::pair insert(value_type const & kv_pair); + * + * template + * std::pair insert(P && value) + * + * std::pair insert(value_type && kv_pair); + * + * iterator insert(iterator pos, value_type const & kv_pair); + * iterator insert(const_iterator pos, value_type const & kv_pair); + * + * template + * iterator insert(const_iterator pos, P && value); + * + * iterator insert(const_iterator pos, value_type && value); + * + * template + * void insert(InputIterator lo, InputIterator hi); + * + * void insert(std::initializer_list initlist); + */ + + /* return: true key already existed (tree size increases by 1) + * false if existing key (tree size unchanged) + */ + std::pair insert(std::pair const & kv_pair) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(this->debug_flag()), + xtag("key", kv_pair.first), + xtag("value", kv_pair.second), + xtag("root", this->root_.get()) + //xtag("nesting", x.nesting_level()) + ); + + log && log(xtag("bptree[before-insert]", (char const *)"...")); + if (log) this->print(std::clog, log.nesting_level()+2); + + std::pair retval; + + if (this->root_) { + NodeType root_type = this->root_->node_type(); + + log && log(xtag("root_type", root_type)); + + switch (root_type) { + case NodeType::leaf: + retval = this->leaf_insert_aux(kv_pair); + break; + case NodeType::internal: + retval = this->internal_insert_aux(kv_pair); + break; + } /*switch*/ + } else { + retval = this->create_root_aux(kv_pair); + } + + log && log(xtag("bptree[after-insert]", (char const *)"...")); + if (log) this->print(std::clog, log.nesting_level() + 2); + + log.end_scope(); + + return retval; + } /*insert*/ + + /* e.g: + * std::map m = ...; + * BplusTree bptree; + * + * bptree.insert(m.begin(), m.end()); + */ + template + void insert(InputIterator lo, InputIterator hi) { + for (InputIterator ix = lo; ix != hi; ++ix) + this->insert(*ix); + } /*insert*/ + + /* return: true if key existed (tree size decreased by 1) + * false if key not found + */ + bool erase(Key const & key) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(this->debug_flag()), + xtag("key", key), + xtag("root", this->root_.get())); + + log && log(xtag("bptree[before-erase]", (char const *)"...")); + if (log) this->print(std::clog, log.nesting_level()+2); + + bool retval = false; + + if (this->root_) { + NodeType root_type = this->root_->node_type(); + + log && log(xtag("root_type", root_type)); + + switch (root_type) { + case NodeType::leaf: + retval = leaf_erase_aux(key); + break; + case NodeType::internal: + retval = internal_erase_aux(key); + break; + } /*switch*/ + } else { + /* tree empty, certainly doesn't contain key */ + } + + log && log(xtag("bptree[after-erase]", (char const *)"...")); + if (log) this->print(std::clog, log.nesting_level()+2); + + log.end_scope(); + + return retval; + } /*erase*/ + + void print(std::ostream & os, std::int32_t indent = 0) const { + using xo::xtag; + using xo::pad; + + os << pad(indent) << "print_aux(os, this->root_.get(), indent+2); + + os << ">"; + os << std::endl; + } /*print*/ + + private: + /* find leaf node associated with given key, within given subtree + * + * .first: index position of leaf in immediate parent of leaf node. 0 when leaf is also root node. + * .seecond: leaf node + */ + static FindNodeResult find_leaf_node_aux(Key const & key, InternalNodeType * subtree_arg) { + FindNodeResult> findresult(0, subtree_arg); + + while (findresult.node() && (findresult.node()->node_type() == NodeType::internal)) { + findresult = (reinterpret_cast(findresult.node()))->find_child(key); + } + + /* findresult.node().node_type() == NodeType::leaf (if non-null) */ + + if (!findresult.node()) { + assert(false); + return FindNodeResult(); + } + + assert(findresult.node()->node_type() == NodeType::leaf); + + /* subtree.canonical_node_type = leaf */ + return FindNodeResult(findresult.ix(), + reinterpret_cast(findresult.node())); + } /*find_leaf_node_aux*/ + + /* count #of keys present in this b+ tree, by visiting every node; + * but short-circuit if internal inconsistency detected + */ + std::size_t verify_helper(bool throw_flag) const { + using xo::scope; + using xo::xtag; + + //scope x("BplusTree.verify_helper"); + + if (Properties::ordinal_tag_value() == tags::ordinal_enabled) { + /* verify tree size (maintained in each node) matches toplevel tree size) */ + if (this->root_ != nullptr) { + if (this->size() != BplusTreeUtil::get_node_size(this->root_.get())) { + if (throw_flag) { + throw std::runtime_error(tostr("BplusTree::verify_helper" + ": mismatched tree size computation", + xtag("root", this->root_.get()), + xtag("bptree.n_element", this->size()), + xtag("bptree.root.size", logutil::nodesize(this->root_.get())))); + } else { + return -1; + } + } + } + } else { + /* subtree size not maintained; skip test */ + } + + /* verify leafnode iterator endpoints */ + + if (this->root_ == nullptr) { + if (this->leafnode_begin_ != nullptr || this->leafnode_end_ != nullptr) { + if (throw_flag) { + throw std::runtime_error(tostr("BplusTree::verify_helper" + ": expected null .leafnode_begin / .leafnode_end pointers" + " with empty tree", + xtag("root", this->root_.get()), + xtag("leafnode_begin", this->leafnode_begin_), + xtag("leafnode_end", this->leafnode_end_))); + } else { + return -1; + } + } + + return 0; + } else { + auto leftmost_fr = this->root_->find_min_leaf_node(); + auto rightmost_fr = this->root_->find_max_leaf_node(); + + if ((leftmost_fr.node() != this->leafnode_begin_) + || (rightmost_fr.node() != this->leafnode_end_)) + { + if (throw_flag) { + throw std::runtime_error(tostr("BplusTree::verify_helper" + ": expected .leafnode_begin / .leafnode_end pointers" + " to match computed first/last leaf nodes", + xtag("root", this->root_.get()), + xtag("leafnode_begin[stored]", this->leafnode_begin_), + xtag("leafnode_begin[computed]", leftmost_fr.node()), + xtag("leafnode_end[stored]", this->leafnode_end_), + xtag("leafnode_end[computed]", rightmost_fr.node()))); + } else { + return -1; + } + } + } + + return this->root_->verify_helper(nullptr /*parent*/, + false /*!with_lub_flag*/, + Key() /*lub_key*/, + nullptr /*lh_leaf*/, + nullptr /*rh_leaf*/); + } /*verify_helper*/ + + /* find leaf node associated with given key; + * this is the node that would contain target key, if it is present. + */ + FindNodeResult find_leaf_node(Key const & key) { + if (!root_.get()) + return FindNodeResult(); + + switch (root_->node_type()) { + case NodeType::leaf: + return FindNodeResult(0, reinterpret_cast(root_.get())); + case NodeType::internal: + return find_leaf_node_aux(key, reinterpret_cast(root_.get())); + } + + assert(false); + return FindNodeResult(); + } /*find_leaf_node*/ + + FindNodeResult find_leaf_node(Key const & key) const { + FindNodeResult findresult = const_cast(this)->find_leaf_node(key); + + return FindNodeResult(findresult.ix(), findresult.node()); + } /*find_leaf_node*/ + + /* insert helper. + * + * require: + * - root node is NodeType::leaf + * - returns true if new key; false if replace value associated with existing key + */ + std::pair + leaf_insert_aux(std::pair const & kv_pair) { + using xo::scope; + using xo::xtag; + + /* will add/replace key,value pair in existing root (which is a leaf) node */ + + scope log(XO_DEBUG(this->debug_flag()), + xtag("key", kv_pair.first), + xtag("value", kv_pair.second)); + + /* root node is a leaf node: + * - tree has between 1 and b elements (where b = branching factor) + */ + LeafNodeType * leaf = reinterpret_cast(this->root_.get()); + + log && log(xtag("leaf", leaf), + xtag("leaf.n_elt", leaf->n_elt()), + xtag("leaf.bf", leaf->branching_factor())); + + /* .elt_v[] + * + * 0 k n-1 with: n <= b = branching factor + * +---+---+- ... -+---+- ... -+---+---+ k = lub(key) in {e1..en} + * | e1| e2| | ek| | | en| + * +---+---+- ... -+---+- ... -+---+---+ + * + * lub_ix_recd.first: true if key already present in tree. implies lub_ix_recd.second >= 1 + * lub_ix_recd.second: upper bound (strict) index position in .elt_v[] of key + */ + std::pair lub_ix_recd = leaf->find_lub_ix(kv_pair.first); + + log && log(xtag("lub_ix_recd.first", lub_ix_recd.first), + xtag("lub_ix_recd.second", lub_ix_recd.second)); + + if (lub_ix_recd.first) { + leaf->assign_leaf_value(lub_ix_recd.second - 1, kv_pair.second); + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf, + lub_ix_recd.second - 1), + false)); + } + + /* key not present in tree, so will be incrementing tree size */ + + if (leaf->n_elt() == leaf->branching_factor()) { + log && log("split root (leaf) node"); + + /* root node is full: + * 1. split into two leaf nodes + * 2. create new root node (internal instead of leaf) + */ + + ++(this->n_element_); + + std::unique_ptr lower(reinterpret_cast(this->root_.release())); + std::unique_ptr upper(lower->split_leaf_upper()); + + /* insert is made into this leaf */ + LeafNodeType * leaf_node = nullptr; + /* new (key, value) pair placed at this index poseition in leaf_node */ + std::size_t leaf_ix = 0; + + /* note corner case: + * when lub_ix_recd.second == lower->n_elt(), + * could either insert (key, value) as smallest key in upper subtree, or largest key in lower subtree. + * however if we put into upper, then also need to correct .root.elt_v_[1].key; + * slightly simpler to insert into lower subtree. + * + */ + if (lub_ix_recd.second <= lower->n_elt()) { + log && log("insert into new LH leaf node"); + leaf_node = lower.get(); + leaf_ix = lub_ix_recd.second; + } else { + log && log("insert into new RH leaf node"); + leaf_node = upper.get(); + leaf_ix = lub_ix_recd.second - lower->n_elt(); + } + + leaf_node->insert_leaf_item(leaf_ix, + std::move(kv_pair), + this->debug_flag()); + + /* create new root node (now with node-type = internal), + * having two child (leaf) nodes + */ + std::unique_ptr root_2 = InternalNodeType::make_2(std::move(lower), + std::move(upper)); + + /* new root node replaces existing root node */ + this->root_ = std::move(root_2); + + this->post_modify_correct_leafnode_endpoints(); + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf_node, + leaf_ix), + true)); + } else if (leaf->n_elt() < this->properties_.branching_factor()) { + /* 1. key is not already present in b+ tree + * 2. leaf_ix+1 is lub on key; move elements [lub .. n_elt) one step to the right + * hope to move elements leaf_ix+1 .. n_elt-1 to the right, + */ + + /* leaf node has room for one more item */ + ++(this->n_element_); + /* insert in root node, moving items [lub_ix .. .n_elt) one to the right */ + leaf->insert_leaf_item(lub_ix_recd.second, + kv_pair, + this->debug_flag()); + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf, + lub_ix_recd.second), + true)); + } else { + /* impossible! */ + + assert(false); + return (std::pair + (const_iterator(), + false)); + } + } /*leaf_insert_aux*/ + + /* insert helper. + * + * require: + * - root node is NodeType::internal + * + * kv_pair: establish assocation kv_pair.first(=key) -> kv_pair.second(=value) + * return: true if insert performed; false if update on existing key + */ + std::pair + internal_insert_aux(std::pair const & kv_pair) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(this->debug_flag()), + xtag("key", kv_pair.first), + xtag("value", kv_pair.second)); + + /* root node is an internal node: + * - tree has at least b elements (where b = branching factor) + */ + FindNodeResult leaffindresult = this->find_leaf_node(kv_pair.first); + LeafNodeType * leaf = leaffindresult.node(); + + log && log(xtag("leaf", leaf), + xtag("leaf.n_elt", leaf->n_elt()), + xtag("leaf.bf", leaf->branching_factor())); + + std::pair lub_ix_recd = leaf->find_lub_ix(kv_pair.first); + + log && log(xtag("lub_ix_recd.first", lub_ix_recd.first), + xtag("lub_ix_recd.second", lub_ix_recd.second)); + + if (lub_ix_recd.first) { + /* key already in tree, just updating associated value */ + leaf->assign_leaf_value(lub_ix_recd.second - 1, kv_pair.second); + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf, + lub_ix_recd.second - 1), + false)); + } + + /* key not present in tree, will be incrementing tree size */ + ++(this->n_element_); + + if (leaf->n_elt() < leaf->branching_factor()) { + log && log("insert into existing leaf, since it has room"); + + /* leaf has room for 1 more item */ + leaf->insert_leaf_item(lub_ix_recd.second, + kv_pair, + this->debug_flag()); + + this->post_modify_add_ancestor_size(leaf->parent(), +1); + + /* whenever we insert at first key position, + * need also to update ancestor glb_key values + */ + { + InternalNodeType * ancestor = leaf->parent(); + + while (ancestor && (kv_pair.first < ancestor->glb_key())) { + ancestor->set_glb_key(kv_pair.first); + ancestor = ancestor->parent(); + } + } + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf, + lub_ix_recd.second), + true)); + } + + /* leaf is full. + * 1. split into two half-full leaf nodes + * 2. insert new (key, value) pair into one of the two + * half-full nodes. + * 3. recursively insert entry for new node into parent; + * possibly splitting parent and additional ancestor + * nodes as need be + */ + + std::unique_ptr new_node; + + /* key,value pair will be inserted into this leaf */ + LeafNodeType * leaf_node = nullptr; + /* key,value pair inserted into leaf at this index position */ + std::size_t leaf_ix = 0; + + if (lub_ix_recd.second < leaf->n_elt() / 2) { + /* will insert into lower_leaf */ + std::unique_ptr lower_leaf(leaf->split_leaf_lower()); + + /* lower_leaf holds lower half of leaf's original set of items. + * leaf now holds upper half of leaf's original set of items. + */ + + log && log("split leaf to get lower_leaf", + xtag("lower_leaf", lower_leaf.get()), + xtag("leaf.n_elt", leaf->n_elt()), + xtag("lower_leaf.n_elt", lower_leaf->n_elt())); + + assert(lub_ix_recd.second <= lower_leaf->n_elt()); + + /* this size temporarily excluded from tree */ + std::size_t decr_z = lower_leaf->size(); + + log && log("insert new key into (new) LH leaf"); + lower_leaf->insert_leaf_item(lub_ix_recd.second, + kv_pair, + this->debug_flag()); + + leaf_node = lower_leaf.get(); + leaf_ix = lub_ix_recd.second; + + new_node = std::move(lower_leaf); + + /* new_node may get attached to ree at non-obvious location. + * at this point it is not in tree. + * + * to bookkeep node sizes, decrement now, then increment where new_node is reintroduced + */ + { + InternalNodeType * parent = leaf->parent(); + + BplusTreeUtil::post_modify_sub_ancestor_size(parent, decr_z, this->debug_flag()); + } + + /* however: leaf's glb increased -> + * need to patch state in (at least parent, possibly more) ancestors + */ + { + GenericNodeType * target = leaf; + InternalNodeType * parent = target->parent(); + + while (parent) { + std::size_t ix = parent->locate_child_by_address(target); + + assert(ix != static_cast(-1)); + + InternalNodeItemType & slot = parent->lookup_elt(ix); + + if (slot.key() == target->glb_key()) { + /* done with fixup */ + break; + } + + slot.set_key(target->glb_key()); + + target = parent; + parent = parent->parent(); + } + } + } else { + /* leaf is full: + * + * note that leaf->n_elt() shrinks across this call + * + * before: + * leaf: + * <-- b elements -> + * 0 1 b-1 + * +---+- ... -+---+ + * | e1| | eb| + * +---+- ... -+---+ + * + * after: + * leaf: upper_leaf: + * <-- b/2 elements -> <-- b/2 elements --> + * 0 1 h 0 + * +---+- ... -+---+ +---+- ... -+---+ + * | e1| | eh| |eh'| | eb| with eh'=e(h+1) + * +---+- ... -+---+ +---+- ... -+---+ + * + * note: if b odd, then: + * - leaf gets (b-1)/2 elements, + * - upper_leaf gets (b+1)/2 elements + */ + + /* will insert into upper_leaf */ + std::unique_ptr upper_leaf(leaf->split_leaf_upper()); + + /* leaf now holds lower half of its original set of items; + * upper holds upper half of leaf's original set of items + */ + + log && log("split leaf to get upper_leaf", + xtag("upper_leaf", upper_leaf.get()), + xtag("leaf.n_elt", leaf->n_elt()), + xtag("upper_leaf.n_elt", upper_leaf->n_elt())); + + assert(lub_ix_recd.second >= leaf->n_elt()); + + /* this size temporarily excluded from tree */ + std::size_t decr_z = upper_leaf->size(); + + log && log("insert new key into (new) RH leaf"); + upper_leaf->insert_leaf_item(lub_ix_recd.second - leaf->n_elt(), + kv_pair, + this->debug_flag()); + + leaf_node = upper_leaf.get(); + leaf_ix = lub_ix_recd.second - leaf->n_elt(); + + new_node = std::move(upper_leaf); + + /* new_node may get attached to tree at non-obvious location. + * at this point it is not in tree. + * + * to bookkeep node sizes, decrement now, then increment where new_node is reintroduced + */ + { + InternalNodeType * parent = leaf->parent(); + + BplusTreeUtil::post_modify_sub_ancestor_size(parent, decr_z, this->debug_flag()); + } + + /* leaf's glb unchanged, no glb fixup required here */ + } + + Key new_key = new_node->glb_key(); + std::size_t lub_ix = 0; + + InternalNodeType * ancestor = leaf->parent(); + + while (ancestor) { + /* invariant: need to add new_node to tree somewhere on path to ancestor. + * new_node is a leaf|internal node with already-correct size, + * that isn't yet accounted for in this B+ tree + */ + + lub_ix = ancestor->find_lub_ix(new_key); + + log && log("fixup ancestors", + xtag("new_key", new_key), + xtag("new_node", new_node.get()), + xtag("new_node.size", logutil::nodesize(new_node.get())), + xtag("ancestor", ancestor), + xtag("lub_ix", lub_ix)); + + /* on this iteration, need to introduce (new_key, new_node) to ancestor */ + + if (ancestor->n_elt() < ancestor->branching_factor()) { + /* ordinal_enabled: #of elements in subtree new_node. + * ordinal_disabled: 0 + */ + std::size_t new_z = BplusTreeUtil::get_node_size(new_node.get()); + + log && log("insert into ancestor, since it has room", + xtag("ancestor.size[pre-insert]", logutil::nodesize(ancestor)), + xtag("new_z", logutil::nodesize(new_node.get()))); + + /* room for 1 more child */ + ancestor->insert_node(lub_ix, + std::move(new_node), + this->debug_flag()); + + /* if ordinal_enabled: increase .size on path from root down to and including ancestor + * otherwise no-op + */ + BplusTreeUtil::post_modify_add_ancestor_size(ancestor, new_z, this->debug_flag()); + this->post_modify_correct_ancestor_glb_keys(ancestor); + this->post_modify_correct_leafnode_endpoints(); + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf_node, + leaf_ix), + true)); + } else { + log && log("pre-split (will split ancestor to make room for new node)", + xtag("ancestor", ancestor), + xtag("ancestor.size", logutil::nodesize(ancestor)), + xtag("new_node", new_node.get()), + xtag("new_node.size", logutil::nodesize(new_node.get()))); + + /* no room in ancestor, need to split */ + + std::unique_ptr upper_ancestor(ancestor->split_internal()); + + log && log("post-split", + xtag("ancestor", ancestor), + xtag("ancestor.size", logutil::nodesize(ancestor)), + xtag("upper_ancestor", upper_ancestor.get()), + xtag("upper_ancestor.size", logutil::nodesize(upper_ancestor.get()))); + + /* this size temporarily excluded from tree */ + std::size_t decr_z = BplusTreeUtil::get_node_size(upper_ancestor.get()); + + /* will add back size fom upper_ancestor (w/ +1 for insert), + * once we figure out where to attach it. + * ancestor.size already decreased via ancestor.split_internal() + */ + BplusTreeUtil::post_modify_sub_ancestor_size(ancestor->parent(), decr_z, this->debug_flag()); + + /* ancestor.n_elt reduced to 1/2 value before call to split_internal() */ + + std::size_t new_z = BplusTreeUtil::get_node_size(new_node.get()); + + if (lub_ix <= ancestor->n_elt()) { + log && log("insert into (existing post-split) LH ancestor"); + log && log(xtag("lub_ix", lub_ix), xtag("new_node", new_node.get()), xtag("new_z", new_z)); + + ancestor->insert_node(lub_ix, + std::move(new_node), + this->debug_flag()); + + /* note: updating entire ancestor chain, + * since next iteration will operate on upper_ancestor != ancestor + */ + BplusTreeUtil::post_modify_add_ancestor_size(ancestor, new_z, this->debug_flag()); + + log && log("LH ancestor size", + xtag("ancestor", ancestor), + xtag("ancestor.size", logutil::nodesize(ancestor))); + + /* note next loop iteration will fixup upper_ancestor. + * upper_ancestor != ancestor + */ + this->post_modify_correct_ancestor_glb_keys(ancestor); + } else { + log && log("insert into (new) RH ancestor"); + log && log(xtag("ix", lub_ix - ancestor->n_elt()), + xtag("new_node", new_node.get()), + xtag("new_z", new_z)); + + upper_ancestor->insert_node(lub_ix - ancestor->n_elt(), + std::move(new_node), + this->debug_flag()); + + /* note: deferring update for ancestor's ancestors until next loop iter */ + BplusTreeUtil::node_add_size(upper_ancestor.get(), new_z); + + log && log("upper ancestor size", + xtag("upper_ancestor", ancestor), + xtag("upper_ancestor.size", logutil::nodesize(ancestor))); + + } + + /* setup for next loop iteration + * reminder: upper_ancestor.size was removed from computed treesize, + * will add back on subsequent iteration (when attaching new_node) + */ + new_key = upper_ancestor->glb_key(); + new_node = std::move(upper_ancestor); + ancestor = ancestor->parent(); + } + } + + log && log("root node was split -> create new root, adding one level"); + + /* if control comes here: + * 1. ancestor is null + * 2. root node was full + has been split. . + * root will become LH subtree of new root + * new_node will become RH subtree of new root + * 3. new_node is not present in .root + */ + + log && log(xtag("root.n_elt", this->root_->n_elt()), + xtag("new_node.n_elt", new_node->n_elt())); + + this->root_ = std::move(InternalNodeType::make_2(std::move(this->root_), + std::move(new_node))); + + this->post_modify_correct_leafnode_endpoints(); + + return (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf_node, + leaf_ix), + true)); + } /*internal_insert_aux*/ + + std::pair + create_root_aux(std::pair const & kv_pair) { + /* create root, with one element */ + this->n_element_ = 1; + + std::unique_ptr leaf_node + = LeafNodeType::make(kv_pair, + this->properties_); + + this->leafnode_begin_ = leaf_node.get(); + this->leafnode_end_ = leaf_node.get(); + + std::pair retval + = (std::pair + (const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + leaf_node.get(), + 0), + true)); + + this->root_.reset(leaf_node.release()); + + return retval; + } /*create_root_aux*/ + + /* remove helper. + * + * require: + * - root node is NodeType::leaf + * - return true iff key found (in which case #of leaf node items decremented) + */ + bool leaf_erase_aux(Key const & key) { + using xo::scope; + using xo::xtag; + + LeafNodeType * leaf = reinterpret_cast(this->root_.get()); + + scope log(XO_DEBUG(this->debug_flag()), + xtag("leaf", leaf), + xtag("leaf.n_elt", leaf->n_elt()), + xtag("leaf.bf", leaf->branching_factor())); + + /* .elt_v[] + * + * 0 k n-1 with: n <= b = branching factor + * +---+---+- ... -+---+- ... -+---+---+ k = lub(key) in {e1..en} + * | e1| e2| | ek| | | en| + * +---+---+- ... -+---+- ... -+---+---+ + * + * lub_ix_recd.first: true if key already present in tree. implies lub_ix_recd.second >= 1 + * lub_ix_recd.second: upper bound (strict) index position in .elt_v[] of key + */ + std::pair lub_ix_recd = leaf->find_lub_ix(key); + + log && log(xtag("lub_ix_recd.first", lub_ix_recd.first), + xtag("lub_ix_recd.second", lub_ix_recd.second)); + + if (!lub_ix_recd.first) { + /* key is not present in tree --> don't modify anything */ + return false; + } + + /* key is present in tree --> will decrement tree size */ + + if (leaf->n_elt() > 1) { + --(this->n_element_); + + /* reminder: lub_ix_recd.second is strict upper bound */ + leaf->remove_leaf(lub_ix_recd.second - 1, + this->debug_flag()); + + } else { + --(this->n_element_); + + /* removed last node -> tree now empty */ + + this->root_.reset(); + + } + + this->post_modify_correct_leafnode_endpoints(); + + log.end_scope(); + + return true; + } /*leaf_erase_aux*/ + + /* remove helper. + * + * require: + * - root node is NodeType::internal + * - return true iff key found (in which case #of key,value pairs decremented) + */ + bool internal_erase_aux(Key const & key) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(this->debug_flag()), + xtag("key", key)); + + std::size_t const bf = this->branching_factor(); + + /* root node is an internal node: + * - tree has at least b elements (where b = branching zfactor) + * + * this + * +------+ + * | | + * +------+ + * . + * . + * +------+ + * | i| i = leaffindresult.ix() + * +------+ + * / | \ + * /-------/ | \--------\ + * | | | + * +------+ +------+ +------+ + * | | | | | j | j = lub_ix_recd.second - 1 + * +------+ +------+ +------+ + * leaf + */ + + FindNodeResult leaffindresult = this->find_leaf_node(key); + LeafNodeType * leaf = leaffindresult.node(); + + log && log(xtag("leaf", leaffindresult.node()), + xtag("leaf.n_elt", leaffindresult.node()->n_elt()), + xtag("leaf.loc", leaffindresult.ix()), + xtag("bf", bf)); + + std::pair lub_ix_recd = leaffindresult.node()->find_lub_ix(key); + + log && log(xtag("lub_ix_recd.first", lub_ix_recd.first), + xtag("lub_ix_recd.second", lub_ix_recd.second)); + + if (!lub_ix_recd.first) { + /* key not in tree */ + return false; + } + + /* key present in tree at leaf.elt_v[lub_ix_recd.second - 1] */ + + /* B+ balance invariant is sustained across remove + * + * if glb key changed, then have to propagate up ancestor chain + */ + --(this->n_element_); + + /* reminder: lub_ix_recd.second is strict upper bound */ + leaf->remove_leaf(lub_ix_recd.second - 1, + this->debug_flag()); + + InternalNodeType * parent = leaf->parent(); + + /* whenever we remove at first key position (with strict upper bound index 1), + * then glb key changed, so need also to update ancestor glb_key values + */ + if (lub_ix_recd.second == 1) { + /* glb_key for this leaf node changed (to larger value) */ + log && log("fix glb", + xtag("@", parent), + xtag("old-glb", parent->glb_key()), + xtag("new-glb", leaf->glb_key())); + + /* we dropped smallest key from [leaf] --> correct glb key for leaf in its immediate parent */ + parent->lookup_elt(leaffindresult.ix()).set_key(leaf->glb_key()); + + this->post_modify_correct_ancestor_glb_keys(parent); + } else { + /* removal from position >0 doesn't change glb key + * -> doesn't require ancestor updates + */ + } + + if (2 * leaf->n_elt() >= bf) { + /* after removal, leaf still has acceptable #of children */ + + /* must decrement tree size on path from root down to and including parent */ + this->post_modify_sub_ancestor_size(parent, +1); + + return true; + } else { + /* after removal, leaf will be too small. plan: + * - try redistributing from a neighboring leaf + * - if result too small, then merge with one of neighboring leaves; + * in this case merges may cascade upward to root + * - if root node shrinks to 1 child, that child becomes new root + */ + log && log("leaf too small after remove -> redistribute or shrink tree"); + + std::size_t leaf_ix = leaffindresult.ix(); + /* right_sibling_ix: position of sibling immediately after (leaf_ix, key), in parent */ + std::size_t right_sibling_ix = leaf_ix + 1; + + LeafNodeType * right_sibling = nullptr; + + if (right_sibling_ix < parent->n_elt()) { + /* consider merge with right sibling */ + right_sibling = reinterpret_cast(parent->lookup_elt(right_sibling_ix).child()); + + std::size_t n = leaf->n_elt() + right_sibling->n_elt(); + + if (n >= 2 * ((bf + 1) / 2)) { + /* can redistribute one or more nodes from right_sibling -> leaf + * e.g. + * if bf=3, require 4 nodes between leaf and rh sibling + * if bf=4, also require 4 nodes between leaf and rh sibling. + * + * after redistribution: + * - leaf will have n/2 elements + * - right_sibling will have n - n/2 elements + */ + leaf->append_from_rh_sibling(n/2 - leaf->n_elt(), right_sibling); + + /* glb_key for right sibling changed, need to fix ancestor book-keeping */ + parent->lookup_elt(right_sibling_ix).set_key(right_sibling->glb_key()); + + this->post_modify_sub_ancestor_size(parent, +1); + this->post_modify_correct_ancestor_glb_keys(parent); + + return true; + } else { + log && log("reject redistrib from right sibling, not enough capacity"); + } + } else { + log && log("reject redistrib from right sibling, doesn't exist"); + } + + std::size_t left_sibling_ix = leaf_ix - 1; + LeafNodeType * left_sibling = nullptr; + + if (leaf_ix > 0) { + /* consider redistribution from left sibling */ + left_sibling = reinterpret_cast(parent->lookup_elt(left_sibling_ix).child()); + + std::size_t n = leaf->n_elt() + left_sibling->n_elt(); + + if (n >= 2 * ((bf + 1) / 2)) { + log && log("redistrib from left sibling"); + + std::size_t n_redistrib = n/2 - leaf->n_elt(); + + log && log(xtag("n/2", n/2), + xtag("leaf.n", leaf->n_elt()), + xtag("n_redistrib", n_redistrib)); + + /* can redistribute one or more nodes from left_sibling -> leaf + * after redistribution: + * - leaf will have n/2 elements + * - left_sibling will have n - n/2 elements + */ + leaf->prepend_from_lh_sibling(left_sibling, n_redistrib, this->debug_flag()); + + /* glb key for leaf changed, need to fix ancestor book-keeping */ + parent->lookup_elt(leaf_ix).set_key(leaf->glb_key()); + + this->post_modify_sub_ancestor_size(parent, +1); + this->post_modify_correct_ancestor_glb_keys(parent); + + return true; + } else { + log && log("reject redistib from left sibling, not enough capacity"); + } + } else { + log && log("reject redistrib from left sibling, doesn't exist"); + } + + /* control here + * -> not enough nodes to redistribute from either sibling + * -> must shrink #nodes in tree + */ + + if (right_sibling_ix < parent->n_elt()) { + assert(right_sibling); + + log && log("merge right sibling"); + + /* RH sibling exists -> merge with it (arbitrary choice if leaf_ix > 0) */ + + leaf->append_rh_sibling(right_sibling); + + /* right_sibling is now (effectively) empty, drop from parent; + * also fixup next_leafnode/prev_leafnode links to bypass + */ + parent->remove_node(right_sibling_ix, this->debug_flag()); + + /* note that glb_key for leaf did not change */ + + /* -1 .size on path from root down to and including parent */ + this->post_modify_sub_ancestor_size(parent, +1); + /* since we reduced #of children at parent, it may have fallen below b/2 lower bound */ + this->post_remove_shrink_ancestor_path(parent); + /* since we removed a leaf node, may have invalidated iterator begin/end endpoints */ + this->post_modify_correct_leafnode_endpoints(); + + return true; + } + + if (leaf_ix > 0) { + assert(left_sibling); + + /* LH sibling exists -> merge with it (arbitrary choice if right_sibling_ix < parent.n_elt */ + + left_sibling->append_rh_sibling(leaf); + + /* leaf is now (effectively) empty, drop from parent; + * also fixup next_leafnode/prev_leafnode links to bypass + */ + parent->remove_node(leaf_ix, this->debug_flag()); + + /* note that glb_key for left_sibling did not change */ + + this->post_modify_sub_ancestor_size(parent, +1); + /* since we reduced #of children at parent, it may have fallen below b/2 lower bound */ + this->post_remove_shrink_ancestor_path(parent); + /* since we removed a leaf node, may have invalidated iterator begin/end endpoints */ + this->post_modify_correct_leafnode_endpoints(); + + return true; + } + + /* must have at least one sibling (else prior visit would have shrunk tree height) */ + } + + log.end_scope(); + + assert(false); + return false; + } /*internal_erase_aux*/ + + void post_modify_add_ancestor_size(InternalNodeType * parent, std::size_t incr_z) { + BplusTreeUtil::post_modify_add_ancestor_size(parent, incr_z, this->debug_flag()); + } /*post_modify_add_ancestor_size*/ + + void post_modify_sub_ancestor_size(InternalNodeType * parent, std::size_t decr_z) { + BplusTreeUtil::post_modify_sub_ancestor_size(parent, decr_z, this->debug_flag()); + } /*post_modify_sub_ancestor_size*/ + + void post_modify_correct_ancestor_glb_keys(InternalNodeType * parent) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(this->debug_flag()), + xtag("parent", parent)); + + InternalNodeType * grandparent = parent->parent(); + + std::size_t i_ancestor = 0;; + while (grandparent) { + log && log(xtag("i_ancestor", i_ancestor), + xtag("grandparent", grandparent)); + + /* find index position of parent subtree, as child of grandparent + * Can only use .find_ix() when key-invariants are satisfied. + * + * Warning: O(bf) call here + */ + std::size_t parent_ix = grandparent->locate_child_by_address(parent); + + log && log(xtag("parent.loc", parent_ix)); + + if (grandparent->lookup_elt(parent_ix).key() == parent->glb_key()) { + log && log("grandparent[parent.loc].key == parent.glb_key --> done"); + break; + } + + log && log("fix glb key in grandparent"); + grandparent->lookup_elt(parent_ix).set_key(parent->glb_key()); + + /* + repeat 1 level up.. */ + parent = grandparent; + grandparent = parent->parent(); + } + } /*post_modify_correct_ancestor_glb_keys*/ + + /* reset .leafnode_begin, .leafnode_end after changing the set of nodes in a b+ tree */ + void post_modify_correct_leafnode_endpoints() { + if (root_) { + this->leafnode_begin_ = root_->find_min_leaf_node().node(); + this->leafnode_end_ = root_->find_max_leaf_node().node(); + } else { + this->leafnode_begin_ = nullptr; + this->leafnode_end_ = nullptr; + } + } /*post_modify_correct_leafnode_endpoints*/ + + void post_remove_shrink_ancestor_path(InternalNodeType * node) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(this->debug_flag())); + + std::size_t const bf = node->branching_factor(); + + while (node + && (node != this->root_.get())) { + + log && log(xtag("node", node), + xtag("node.n_elt", node->n_elt())); + + if (2 * node->n_elt() >= bf) + break; + + /* node has fewer children than B+ minimum. + * either: + * - redistribute nodes from sibling + * (so that merged node satisfies bf/2 <= n <= bf) + * - merge with sibling + */ + InternalNodeType * parent = node->parent(); + + /* O(bf), but doesn't rely on satisfied key invariants */ + std::size_t node_ix = parent->locate_child_by_address(node); + std::size_t right_sibling_ix = node_ix + 1; + + InternalNodeType * right_sibling = nullptr; + + if (right_sibling_ix < parent->n_elt()) { + /* consider redistributng from right sibling */ + right_sibling = reinterpret_cast(parent->lookup_elt(right_sibling_ix).child()); + + std::size_t n = node->n_elt() + right_sibling->n_elt(); + + if (n >= 2 * ((bf + 1) / 2)) { + log && log("redistribute from right_sibling", + xtag("lh.n", node->n_elt()), + xtag("rh.n", right_sibling->n_elt())); + + /* can redistribute one or more nodes from right_sibling -> node + * + * after redistribution: + * - node will have floor(n/2) elements + * - right_sibling will have ceil(n/2) = n - floor(n/2) elements + */ + node->append_from_rh_sibling(n/2 - node->n_elt(), right_sibling); + + /* glb_key for right sibling changed, need to fixup ancestor book-keeping */ + this->post_modify_correct_ancestor_glb_keys(right_sibling); + + return; + } + } + + std::size_t left_sibling_ix = node_ix - 1; /* but beware underflow when node_ix=0 */ + + InternalNodeType * left_sibling = nullptr; + + if (node_ix > 0) { + /* consider redistributing from left sibling */ + left_sibling = reinterpret_cast(parent->lookup_elt(left_sibling_ix).child()); + + std::size_t n = node->n_elt() + left_sibling->n_elt(); + + if (n >= 2 * ((bf + 1) / 2)) { + log && log("redistribute from left_sibling", + xtag("lh.n", left_sibling->n_elt()), + xtag("rh.n", node->n_elt())); + + /* redistribute one or more nodes from left_sibling -> node */ + node->prepend_from_lh_sibling(left_sibling, + n/2 - node->n_elt(), + this->debug_flag()); + + /* glb_key for node changed, need to fixup ancestor book-keeping */ + this->post_modify_correct_ancestor_glb_keys(node); + + return; + } + } + + log && log("cannot redistribute -> drop a node"); + + /* control here + * -> not enough nodes to redistribute from either sibling + * -> must shrink number of nodes in tree + */ + + if (right_sibling_ix < parent->n_elt()) { + assert(right_sibling); + + /* RH sibling exists -> merge with it */ + + node->append_rh_sibling(right_sibling); + + /* right sibling now empty, drop from parent */ + parent->remove_node(right_sibling_ix, this->debug_flag()); + } else if (node_ix > 0) { + assert(left_sibling); + + /* LH sibling exists -> merge with it */ + + left_sibling->append_rh_sibling(node); + + /* node is now empty, drop from parent */ + parent->remove_node(node_ix, this->debug_flag()); + } + + /* continue tree fixup at parent node */ + node = parent; + } + + /* if node != root: tree shrank successfully, without propgating to root */ + + if ((node == this->root_.get()) && (node->n_elt() == 1)) + { + /* replace root with its single child element; tree height shrinks by one */ + this->root_ = std::move(node->lookup_elt(0).release_child()); + + this->root_->set_parent(nullptr); + } + } /*post_remove_shrink_ancestor_path*/ + + void print_aux(std::ostream & os, + GenericNodeType const * node, + std::uint32_t indent) const + { + using xo::xtag; + + if (node) { + switch(node->node_type()) { + case NodeType::internal: + { + using xo::pad; + + InternalNodeType const * internal = reinterpret_cast(node); + + for (std::uint32_t i=0, n=internal->n_elt(); ilookup_elt(i).child()->n_elt()) + << xtag("treez", logutil::nodesize(internal->lookup_elt(i).child())) + << xtag("glb", internal->lookup_elt(i).key()) + << xtag("@", internal->lookup_elt(i).child()); + + this->print_aux(os, + internal->lookup_elt(i).child(), + indent+1); + } + } + break; + case NodeType::leaf: + { + using xo::pad; + + LeafNodeType const * leaf = reinterpret_cast(node); + + for (std::uint32_t i=0, n=leaf->n_elt(); ilookup_elt(i).key() + << ": " << leaf->lookup_elt(i).value(); + } + } + break; + } + } else { + //os << std::endl; + } + } /*print_aux*/ + + private: + /* tree properties, in particular: branching factor */ + Properties properties_; + + /* #of items in this tree */ + std::size_t n_element_ = 0; + + /* left-most leaf node for inorder traversal */ + LeafNodeType * leafnode_begin_ = nullptr; + /* right-most leaf node for inorder traversal */ + LeafNodeType * leafnode_end_ = nullptr; + + /* tree + * size root depth + * ------------------------- + * 0 nullptr 0 + * 1..b LeafNode 1 + * >b InternalNode >1 + */ + std::unique_ptr root_; + }; /*BplusTree*/ + + } /*namespace tree*/ +} /*namespace xo*/ + +/* end BplusTree.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/RedBlackTree.hpp b/xo-ordinaltree/include/xo/ordinaltree/RedBlackTree.hpp new file mode 100644 index 00000000..7a3ea1e4 --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/RedBlackTree.hpp @@ -0,0 +1,3158 @@ +/* @file RedBlackTree.hpp */ + +/* provides red-black tree with order statistics. + */ + +#pragma once + +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/pad.hpp" +#include "xo/indentlog/print/quoted.hpp" +#include +#include +#include +#include +#include +#include + +namespace xo { + namespace tree { + + /* concept for the 'Reduce' argument to RedBlackTree<...> + * + * here: + * T = class implementing reduce feature, e.g. SumReduce<...> + * T::value_type = type for output of reduce function. + * + * Value = value_type for rb-tree that supports ordinal statistics + * + * e.g. + * struct ReduceCountAndSum { + * using value_type = std::pair: + * + * value_type nil() { return value_type(0, 0); } + * value_type operator()(value_type const & acc, int64_t val) + * { return value_type(acc.first + val.first, acc.second + val.second); } + * value_type operator()(value_type const & a1, value_type const & a2) + * { return value_type(a1.first + a2.first, a1.second + a2.second); } + * }; + * + * Reduce.nil() -> nominal reduction i.e. reduce on empty set + * Reduce.leaf(v) -> reduction on set {v} + * + * in general: at some internal node, tree splits set of key/value pairs on some key k1, + * with a left subtree lh, and a right subtree rh. + * + * for a binary tree we want to maintain: + * - r1: reduce applied to collection + * lh + {k1} = reduce(reduce(lh), k1) + * - r2: reduce applied to collection + * lh + {k1} + rh = reduce.combine(r1, reduce(r2)) + * + */ + template + concept ReduceConcept = requires(T r, Value v, typename T::value_type a) { + typename T::value_type; + { r.nil() } -> std::same_as; + { r.leaf(v) } -> std::same_as; + { r(a, v) } -> std::same_as; + { r.combine(a, a) } -> std::same_as; + }; + + /* reduce function that disappears at compile time */ + template + struct NullReduce; + + /* red-black tree with order statistics + * + * require: + * - Key is equality comparable + * - Key, Value, Reduce are copyable and null-constructible + * - Reduce.value_type = Accumulator + * - Reduce.operator() :: (Accumulator x Key) -> Accumulator + * - Reduce.operator() :: (Accumulator x Accumulator) -> Accumulator + */ + template > + class RedBlackTree; + + namespace detail { + enum Color { C_Invalid = -1, C_Black, C_Red, N_Color }; + + enum Direction { D_Invalid = -1, D_Left, D_Right, N_Direction }; + + inline Direction other(Direction d) { + return static_cast(1 - d); + } /*other*/ + + template + class RbTreeUtil; + + /* xo::tree::detail::Node + * + * Require: + * - Key.operator< + * - Key.operator== + * + */ + template + class Node { + public: + using ReducedValue = typename Reduce::value_type; + using ContentsType = std::pair; + using value_type = std::pair; + + public: + Node() = default; + Node(value_type const & kv_pair, + std::pair const & r) + : color_(C_Red), size_(1), contents_{kv_pair}, reduced_(r) {} + Node(value_type && kv_pair, + std::pair && r) + : color_(C_Red), size_(1), + contents_{std::move(kv_pair)}, + reduced_{std::move(r)} {} + + static Node * make_leaf(value_type const & kv_pair, + ReducedValue const & leaf_rv) { + return new Node(kv_pair, + std::pair(leaf_rv, leaf_rv)); + } /*make_leaf*/ + + static Node * make_leaf(value_type && kv_pair, + ReducedValue const & leaf_rv) { + return new Node(kv_pair, + std::pair(leaf_rv, leaf_rv)); + } /*make_leaf*/ + + /* return #of key/vaue pairs in tree rooted at x. */ + static size_t tree_size(Node *x) { + if (x) + return x->size(); + else + return 0; + } /*tree_size*/ + + static bool is_black(Node *x) { + if (x) + return x->is_black(); + else + return true; + } /*is_black*/ + + static bool is_red(Node *x) { + if (x) + return x->is_red(); + else + return false; + } /*is_red*/ + + static Direction child_direction(Node *p, Node *n) { + if (p) { + return p->child_direction(n); + } else { + return D_Invalid; + } + } /*child_direction*/ + + static ReducedValue reduce_aux(Reduce reduce, Node *x) + { + if(x) + return x->reduced2(); + else + return reduce.nil(); + } /*reduce_aux*/ + + /* calculate reduced values for node x. + * does not used x.reduced + */ + static std::pair reduced_pair(Reduce r, Node const * x) + { + if(!x) + assert(false); + + ReducedValue r1 = r(reduce_aux(r, x->left_child()), + x->value()); + ReducedValue r2 = r.combine(r1, + reduce_aux(r, x->right_child())); + return std::pair(r1, r2); + } /*reduced_pair*/ + + /* replace root pointer *pp_root with x; + * set x parent pointer to nil + */ + static void replace_root_reparent(Node *x, Node **pp_root) { + *pp_root = x; + if (x) + x->parent_ = nullptr; + } /*replace_root_reparent*/ + + size_t size() const { return size_; } + /* const access */ + ContentsType const & contents() const { return contents_; } + /* non-const value access. + * + * editorial: would prefer to return + * std::pair & + * here, so that tree[k].first = newk + * prohibited, but std::pair + * is considered unrelated to std::pair, + * so l-value conversion not allowed + */ + ContentsType & contents() { return contents_; } + + Node *parent() const { return parent_; } + Node *child(Direction d) const { return child_v_[d]; } + Node *left_child() const { return child_v_[0]; } + Node *right_child() const { return child_v_[1]; } + ReducedValue const & reduced1() const { return reduced_.first; } + ReducedValue const & reduced2() const { return reduced_.second; } + + /* true if this node has 0 children */ + bool is_leaf() const { + return ((child_v_[0] == nullptr) && (child_v_[1] == nullptr)); + } + + /* identify which child x represents + * Require: + * - x != nullptr + * - x is either this->left_child() or this->right_child() + */ + Direction child_direction(Node *x) { + if (x == this->left_child()) + return D_Left; + else if (x == this->right_child()) + return D_Right; + else + return D_Invalid; + } /*child_direction*/ + + bool is_black() const { return this->color_ == C_Black; } + bool is_red() const { return this->color_ == C_Red; } + + bool is_red_left() const { return is_red(this->left_child()); } + bool is_red_right() const { return is_red(this->right_child()); } + + /* true if this node is red, and either child is red */ + bool is_red_violation() const { + if (this->color_ == C_Red) { + Node *left = this->left_child(); + Node *right = this->right_child(); + + if (left && left->is_red()) + return true; + + if (right && right->is_red()) + return true; + } + + return false; + } /*is_red_violation*/ + + Color color() const { return color_; } + Key const & key() const { return contents_.first; } + Value const & value() const { return contents_.second; } + + /* recalculate size from immediate childrens' sizes + * editor bait: recalc_local_size() + */ + void local_recalc_size(Reduce const & reduce_fn) { + using xo::scope; + using xo::xtag; + + //constexpr char const * c_self = "Node::local_recalc_size"; + constexpr bool c_logging_enabled = false; + + scope log(XO_DEBUG(c_logging_enabled)); + + this->size_ = (1 + + Node::tree_size(this->left_child()) + + Node::tree_size(this->right_child())); + + /* (note: want reduce applied to all of left subtree) */ + this->reduced_ = Node::reduced_pair(reduce_fn, this); + + log && log("done recalc for key k, value v, reduced r", + xtag("k", this->key()), + xtag("v", this->value()), + xtag("r1", this->reduced1()), + xtag("r2", this->reduced2())); + } /*local_recalc_size*/ + + private: + void assign_color(Color x) { this->color_ = x; } + void assign_size(size_t z) { this->size_ = z; } + + void assign_child_reparent(Direction d, Node *new_x) { + Node *old_x = this->child_v_[d]; + + // trying to fix old_x can be counterproductive, + // since old_x->parent_ may already have been corrected, + // + if (old_x && (old_x->parent_ == this)) + old_x->parent_ = nullptr; + + this->child_v_[d] = new_x; + + if (new_x) { + new_x->parent_ = this; + } + } /*assign_child_reparent*/ + + /* replace child that points to x, with child that points to x_new + * and return direction of the child that was replaced + * + * Require: + * - x is a child of *this + * - x_new is not a child of *this + * + * promise: + * - x is nullptr or x.parent is nullptr + * - x_new is nullptr or x_new.parent is this + */ + Direction replace_child_reparent(Node *x, Node *x_new) { + Direction d = this->child_direction(x); + + if (d == D_Left || d == D_Right) { + this->assign_child_reparent(d, x_new); + return d; + } else { + return D_Invalid; + } + } /*replace_child_reparent*/ + + friend class RbTreeUtil; + friend class xo::tree::RedBlackTree; + + private: + /* red | black */ + Color color_ = C_Red; + /* size of subtree (#of key/value pairs) rooted at this node */ + size_t size_ = 0; + /* .first = key associated with this node + * .second = value associated with this node + * .third = reduced value + */ + ContentsType contents_; + /* accumulator for some binary function of Values. + * must be associative, since value will be produced + * by any testing of calls to Reduce::combine(). + * + * e.g. {a, b, c, d} could be reduced by: + * r(r(a,b), r(c,d)) + * or + * r(a, r(r(b, c), d)) + * etc. + * + * examples: + * - count #of keys + * - sum key values + * + * .reduced.first: reduce applied to all values with keys <= .contents.first + * .reduced.second: reduce applied to all values in this subtree. + */ + std::pair reduced_; + /* pointer to parent node, nullptr iff this is the root node */ + Node *parent_ = nullptr; + /* + * .child_v[0] = left child + * .child_v[1] = right child + * + * invariants: + * - if .child_v[x] non-null, then .child_v[0]->parent = this + * - a red node may not have red children + */ + std::array child_v_ = {nullptr, nullptr}; + }; /*Node*/ + + enum IteratorDirection { + /* ID_Forward. forward iterator + * ID_Reverse. reverse iterator + */ + ID_Forward, + ID_Reverse + }; /*IteratorDirection*/ + + /* specify iterator location relative to Iterator::node. + * using this to make it possible to correctly decrement an + * iterator at RedBlackTree::end(). + * + * IL_BeforeBegin. if non-empty tree, .node is the first node + * in the tree (the one with smallest key), + * and iterator refers to the location + * "one before" that first node. + * IL_Regular. iterator refers to member of the tree + * given by Iterator::node + * IL_AfterEnd. if non-empty tree, .node is the last node + * in the tree (the one with largest key), + * and iterator refers the the location + * "one after" that last node. + */ + enum IteratorLocation { + IL_BeforeBegin, + IL_Regular, + IL_AfterEnd, + }; /*IteratorLocation*/ + + /* require: + * - Reduce::value_type + */ + template + class RbTreeUtil { + public: + using RbNode = Node; + using ReducedValue = typename Reduce::value_type; + using value_type = std::pair; + + public: + /* return #of key/vaue pairs in tree rooted at x. */ + static size_t tree_size(RbNode *x) { + if (x) + return x->size(); + else + return 0; + } /*tree_size*/ + + static bool is_black(RbNode *x) { + if (x) + return x->is_black(); + else + return true; + } /*is_black*/ + + static bool is_red(RbNode *x) { + if (x) + return x->is_red(); + else + return false; + } /*is_red*/ + + /* for every node n in tree, call fn(n, d'). + * d' is the depth of the node n relative to starting point x, + * not counting red nodes. + * make calls in increasing key order (i.e. inorder traversal) + * argument d is the black-height of tree above x + * + * Require: + * - fn(x, d) + */ + template + static void inorder_node_visitor(RbNode const * x, uint32_t d, Fn && fn) { + if (x) { + /* dd: black depth of child subtrees*/ + uint32_t dd = (x->is_black() ? d + 1 : d); + + inorder_node_visitor(x->left_child(), dd, fn); + /* dd includes this node */ + fn(x, dd); + inorder_node_visitor(x->right_child(), dd, fn); + } + } /*inorder_node_visitor*/ + + /* note: RedBlackTree.clear() abuses this to visit-and-delete + * all nodes + */ + template + static void postorder_node_visitor(RbNode const * x, uint32_t d, Fn && fn) { + if (x) { + uint32_t dd = (x->is_black() ? d + 1 : d); + + postorder_node_visitor(x->left_child(), dd, fn); + postorder_node_visitor(x->right_child(), dd, fn); + /* dd includes this node */ + fn(x, dd); + } + } /*postorder_node_visitor*/ + + /* return the i'th inorder node (counting from 0) + * belonging to the subtree rooted at N. + * + * behavior not defined if subtree at N contains less than + * (i + 1) nodes + */ + static RbNode * find_ith(RbNode * N, uint32_t i) { + if(!N) + return nullptr; + + RbNode * L = N->left_child(); + uint32_t n_left = tree_size(L); + + if(i < n_left) + return find_ith(L, i); + else if(i == n_left) + return N; + else if(i < N->size_) + return find_ith(N->right_child(), i - (n_left + 1)); + else + return nullptr; + } /*find_ith*/ + + /* starting from x, traverse only left children + * to find node with a nil left child. + * + * This node has the smallest key in subtree N + */ + static RbNode * find_leftmost(RbNode * N) { + while(N) { + RbNode * S = N->left_child(); + + if(!S) + break; + + N = S; + } + + return N; + } /*find_leftmost*/ + + /* return node containing the next key after N->key_ in the tree + * containing N. This will be either a descendant of N, + * or an ancestor of N. + * returns nil if x.key is the largest key in tree containing x. + */ + static RbNode * next_inorder_node(RbNode * N) { + if(!N) + return nullptr; + + if(N->right_child()) + return find_leftmost(N->right_child()); + + /* N has no right child --> + * successor is the nearest ancestor with a left child + * on path to N + */ + + RbNode * x = N; + + while(x) { + RbNode * P = x->parent(); + + if(P && P->left_child() == x) { + return P; + } + + /* path P..N traverses only right-child pointers) */ + x = P; + } + + /* no ancestor of N with a left child, so N has the largest key + * in the tree + */ + return nullptr; + } /*next_inorder_node*/ + + /* return node containing the key before N->key_ in the tree containing N. + * This will be either a descendant of N, or an ancestor of N + */ + static RbNode * prev_inorder_node(RbNode * N) { + if(!N) + return nullptr; + + if(N->left_child()) + return find_rightmost(N->left_child()); + + /* N has no left child --> + * predecessor is the nearest ancestor with a right child + * on path to N + */ + + RbNode * x = N; + + while(x) { + RbNode * P = x->parent(); + + if(P && (P->right_child() == x)) { + return P; + } + + /* path P..N traverses only left-child pointers */ + x = P; + } + + /* no ancestor of N with a right child, so N has the smallest key + * in tree that containing it. + */ + return nullptr; + } /*prev_inorder_node*/ + + /* compute value of reduce applied to the set K of all keys k[j] in subtree N + * with: + * k[j] <= lub_key if is_closed = true + * k[j] < lub_key if is_closed = false + * return reduce_fn.nil() if K is empty + */ + static ReducedValue reduce_lub(Key const & lub_key, + Reduce const & reduce_fn, + bool is_closed, + RbNode * N) + { + ReducedValue retval = reduce_fn.nil(); + + for (;;) { + if (!N) + return retval; + + if ((N->key() < lub_key) || (is_closed && (N->key() == lub_key))) { + /* all keys k[i] in left subtree of N satisfy k[i] < lub_key + * apply reduce to: + * - left subtree of N + * - N->key() depending on comparison with lub_key + * - any members of right subtree of N, with key < lub_key; + */ + retval = reduce_fn.combine(retval, N->reduced1()); + N = N->right_child(); + } else { + /* all keys k[j] in right subtree of N do NOT satisfy k[j] < + * lub_key, exclude these. also exclude N->key() + */ + N = N->left_child(); + } + } + } /*reduce_lub*/ + + /* find largest key k such that + * reduce({node j in subtree(N)) | j.key <= k}) < p + * + * ^ + * 1 | xxxx + * | xx + * p |....... x + * | x + * | xx . + * | xxxx . + * 0 +----------------> + * ^ + * find_cum_glb(p) + * + * here Key is a sample value, + * Value counts #of samples with that key. + * + * find_cum_glb() computes inverse for a monotonically increasing function, + * if reduce(S) = sum {j.value | j in S}; + * + * if rbtree stores values for a discrete function f: IR -> IR+, + * then x = find_sum_glb(p)->key() inverts the integral of f, i.e. + * computes: + * x + * / + * | + * sup { x: | f(z) dz < y } + * | + * / + * -oo + * + * Require: + * - Reduce behaves like sum: + * must deliver monotonically increasing values + * with increasing key-values. + * + * (for example: if Value is non-negative and Reduce is SumReduce) + */ + static RbNode * find_sum_glb(Reduce const & reduce_fn, + RbNode * N, + typename Reduce::value_type y) { + using xo::scope; + using xo::xtag; + + constexpr char const * c_self = "RbTreeUtil::find_sum_glb"; + constexpr bool c_logging_enabled = false; + scope log(XO_DEBUG(c_logging_enabled)); + + if(!N) { + log && log(c_self, ": return nullptr"); + return nullptr; + } + + typename Reduce::value_type left_sum + = RbNode::reduce_aux(reduce_fn, N->left_child()); + typename Reduce::value_type right_sum + = RbNode::reduce_aux(reduce_fn, N->right_child()); + + log && log("with", + xtag("y", y), + xtag("N.key", N->key()), + xtag("N.value", N->value()), + xtag("N.reduced1", N->reduced1()), + xtag("left_sum", left_sum), + xtag("right_sum", right_sum)); + + if (y <= left_sum) { + return find_sum_glb(reduce_fn, N->left_child(), y); + } else if (y <= N->reduced1() || !N->right_child()) { + log && log("return N"); + /* since N.reduced = reduce(left_sum, N.value, right_sum) */ + return N; + } else { + /* find bound in non-null right subtree */ + return find_sum_glb(reduce_fn, N->right_child(), y - N->reduced1()); + } + } /*find_sum_glb*/ + + /* starting from x, traverse only right children + * to find node with a nil right child + * + * This node has the largest key in subtree N + */ + static RbNode * find_rightmost(RbNode *N) { + while(N) { + RbNode *S = N->right_child(); + + if (!S) + break; + + N = S; + } + + return N; + } /*find_rightmost*/ + + /* find node in x with key k + * return nullptr iff no such node exists. + */ + static RbNode * find(RbNode * x, Key const & k) { + for (;;) { + if (!x) + return nullptr; + + if (k < x->key()) { + /* search in left subtree */ + x = x->left_child(); + } else if (k == x->key()) { + return x; + } else /* k > x->key() */ { + x = x->right_child(); + } + } + } /*find*/ + + /* find greatest lower bound for key k in tree x, + * provided it's tighter than candidate h. + * + * require: + * if h is provided, then x belongs to right subtree of h + * (so any key k' in x satisfies k' > h->key) + * + */ + static RbNode *find_glb_aux(RbNode *x, RbNode *h, Key const &k, + bool is_closed) { + for (;;) { + if (!x) + return h; + + if (x->key() < k) { + /* x.key is a lower bound for k */ + + if (x->right_child() == nullptr) { + /* no tighter lower bounds present in subtree rooted at x */ + + /* x must be better lower bound than h, + * since when h is non-nil we are searching right subtree of h + */ + return x; + } + + /* look for better lower bound in right child */ + h = x; + x = x->right_child(); + continue; + } else if (is_closed && (x->key() == k)) { + /* x.key is exact match */ + return x; + } else { + /* x.key is an upper bound for k. If there's a lower bound, + * it must be in left subtree of x + */ + + /* preserving h */ + x = x->left_child(); + continue; + } + } /*looping over tree nodes*/ + } /*find_glb_aux*/ + + /* find greatest lower bound node for a key, in this subtree + * + * is_open. if true, allow result with N->key = k exactly + * if false, require N->key < k + */ + static RbNode * find_glb(RbNode * x, Key const & k, bool is_closed) { + return find_glb_aux(x, nullptr, k, is_closed); + } /*find_glb*/ + +#ifdef NOT_IN_USE + /* find least upper bound node for a key, in this subtree* + * + * is_open. if true, allow result with N->key = k exactly + * if false, require N->key > k + */ + static RbNode *find_lub(RbNode *x, Key const &k, bool is_closed) { + if (x->key() > k) { + /* x.key is an upper bound for k */ + if (x->left_child() == nullptr) { + /* no tigher upper bound present in subtree rooted at x */ + return x; + } + + RbNode *y = find_lub(x->left_child(), k, is_closed); + + if (y) { + /* found better upper bound in left subtree */ + return y; + } else { + return x; + } + } else if (is_closed && (x->key() == k)) { + return x; + } else { + /* x.key is not an upper bound for k */ + return find_lub(x->right_child(), k, is_closed); + } + } /*find_lub*/ +#endif + + /* perform a tree rotation in direction d at node A. + * + * Require: + * - A is non-nil + * - A->child(other(d)) is non-nil + * + * if direction=D_Left: + * + * G G + * | | + * A B <- retval + * / \ / \ + * R B ==> A T + * / \ / \ + * S T R S + * + * if direction=D_Right: + * + * G G + * | | + * A B <- retval + * / \ / \ + * B R ==> T A + * / \ / \ + * T S S R + */ + static RbNode *rotate(Direction d, RbNode *A, + Reduce const & reduce_fn, + RbNode **pp_root) { + using xo::scope; + using xo::xtag; + + //constexpr char const *c_self = "RbTreeUtil::rotate"; + constexpr bool c_logging_enabled = false; + + scope log(XO_DEBUG(c_logging_enabled)); + + Direction other_d = other(d); + + RbNode *G = A->parent(); + RbNode *B = A->child(other_d); + //RbNode *R = A->child(d); // not using + RbNode *S = B->child(d); + //RbNode *T = B->child(other_d); // not using + + if (log.enabled()) { + log("rotate-", (d == D_Left) ? "left" : "right", + " at", xtag("A", A), xtag("A.key", A->key()), xtag("B", B), + xtag("B.key", B->key())); + + if (G) { + log("with G", xtag("G", G), + xtag("G.key", G->key())); + // display_aux(D_Invalid /*side*/, G, 0, &lscope); + } else { + log("with A at root"); + // display_aux(D_Invalid /*side*/, A, 0, &lscope); + } + } + + /* note: this will set A's old child B to have null parent ptr */ + A->assign_child_reparent(other_d, S); + A->local_recalc_size(reduce_fn); + + B->assign_child_reparent(d, A); + B->local_recalc_size(reduce_fn); + + if (G) { + G->replace_child_reparent(A, B); + assert(B->parent() == G); + + /* note: G.size not affected by rotation */ + } else { + RbNode::replace_root_reparent(B, pp_root); + } + + return B; + } /*rotate*/ + + /* fixup size in N and all ancestors of N, + * after insert/remove affecting N + */ + static void fixup_ancestor_size(Reduce const & reduce_fn, RbNode *N) { + while (N) { + N->local_recalc_size(reduce_fn); + N = N->parent(); + } + } /*fixup_ancestor_size*/ + + /* rebalance to fix possible red-red violation at node G or G->child(d). + * + * diagrams are for d=D_Left; + * mirror left-to-right to get diagram for d=D_Right + * + * G + * d-> / \ <-other_d + * P U + * / \ + * R S + * + * relative to prevailing black-height h: + * - P at h + * - U at h + * - may have red-red violation between G and P + * + * Require: + * - tree is in RB-shape, except for possible red-red violation + * between {G,P} or {P,R|S} + * Promise: + * - tree is in RB-shape + */ + static void fixup_red_shape(Direction d, RbNode *G, + Reduce const & reduce_fn, + RbNode **pp_root) { + using xo::scope; + using xo::xtag; + using xo::print::ccs; + + //constexpr char const *c_self = "RbTreeUtil::fixup_red_shape"; + constexpr bool c_logging_enabled = false; + constexpr bool c_excessive_verify_enabled = false; + + scope log(XO_DEBUG(c_logging_enabled)); + + RbNode *P = G->child(d); + + for (uint32_t iter = 0;; ++iter) { + if (c_excessive_verify_enabled) + RbTreeUtil::verify_subtree_ok(reduce_fn, G, nullptr /*&black_height*/); + + if (log.enabled()) { + if (G) { + log("consider node G with d-child P", + xtag("iter", iter), xtag("G", G), + xtag("G.col", ccs((G->color() == C_Red) ? "r" : "B")), + xtag("G.key", G->key()), + xtag("d", ccs((d == D_Left) ? "L" : "R")), + xtag("P", P), + xtag("P.col", ccs((P->color() == C_Red) ? "r" : "B")), + xtag("P.key", P->key())); + } else { + log("consider root P", xtag("iter", iter), + xtag("P", P), + xtag("P.col", ccs((P->color() == C_Red) ? "r" : "B")), + xtag("P.key", P->key())); + } + + RbTreeUtil::display_aux(D_Invalid /*side*/, G ? G : P, 0 /*d*/, + &log); + } /*if logging enabled*/ + + if (G && G->is_red_violation()) { + log && log("red-red violation at G - defer"); + + /* need to fix red-red violation at next level up + * + * . (=G') + * | (=d') + * G* (=P') + * d-> / \ <-other-d + * P* U + * / \ + * R S + */ + P = G; + G = G->parent(); + d = RbNode::child_direction(G, P); + + continue; + } + + log && log("check for red violation at P"); + + if (!P->is_red_violation()) { + log && log("red-shape ok at {G,P}"); + + /* RB-shape restored */ + return; + } + + if (!G) { + log && log("make P black to fix red-shape at root"); + + /* special case: P is root of tree. + * can fix red violation by making P black + */ + P->assign_color(C_Black); + return; + } + + Direction other_d = other(d); + + RbNode *R = P->child(d); + RbNode *S = P->child(other_d); + RbNode *U = G->child(other_d); + + if (log.enabled()) { + log("got R,S,U", xtag("R", R), xtag("S", S), + xtag("U", U)); + if (R) { + log("with", + xtag("R.col", ccs(R->color_ == C_Black ? "B" : "r")), + xtag("R.key", R->key())); + } + if (S) { + log("with", + xtag("S.col", ccs(S->color_ == C_Black ? "B" : "r")), + xtag("S.key", S->key())); + } + if (U) { + log("with", + xtag("U.col", ccs(U->color_ == C_Black ? "B" : "r")), + xtag("U.key", U->key())); + } + } + + assert(is_black(G)); + assert(is_red(P)); + assert(is_red(R) || is_red(S)); + + if (RbNode::is_red(U)) { + /* if d=D_Left: + * + * *=red node + * + * . . (=G') + * | | (=d') + * G G* (=P') + * d-> / \ / \ + * P* U* ==> P U + * / \ / \ + * (*)R S(*) (*)R S(*) + * + * (*) exactly one of R or S is red (since we have a red-violation + * at P) + * + * Note: this transformation preserves #of black nodes along path + * from root to each of {T, R, S}, so it preserves the "equal + * black-node path" property + */ + G->assign_color(C_Red); + P->assign_color(C_Black); + U->assign_color(C_Black); + + log && log("fixed red violation at P, retry 1 level higher"); + + /* still need to check for red-violation at G's parent */ + P = G; + G = G->parent(); + d = RbNode::child_direction(G, P); + + continue; + } + + assert(RbNode::is_black(U)); + + if (RbNode::is_red(S)) { + log && log("rotate-", (d == D_Left) ? "left" : "right", + " at P", xtag("P", P), xtag("P.key", P->key()), + xtag("S", S), xtag("S.key", S->key())); + + /* preparatory step: rotate P in d direction if "inner child" + * (S) is red inner-child = right-child of left-parent or vice + * versa + * + * G G + * / \ / \ + * P* U ==> (P'=) S* U + * / \ / \ + * R S* (R'=) P* + * / \ + * R + */ + RbTreeUtil::rotate(d, P, reduce_fn, pp_root); + + if (c_excessive_verify_enabled) + RbTreeUtil::verify_subtree_ok(reduce_fn, S, nullptr /*&black_height*/); + + /* (relabel S->P etc. for merged control flow below) */ + R = P; + P = S; + } + + /* + * G P + * / \ / \ + * P* U ==> R* G* + * / \ / \ + * R* S S U + * + * ok since every path that went through previously-black G + * now goes through newly-black P + */ + P->assign_color(C_Black); + G->assign_color(C_Red); + + log && log("rotate-", + (other_d == D_Left) ? "left" : "right", " at G", + xtag("G", G), xtag("G.key", G->key())); + + RbTreeUtil::rotate(other_d, G, reduce_fn, pp_root); + + if (c_excessive_verify_enabled) { + RbNode *GG = G ? G->parent() : G; + if (!GG) + GG = P; + + if (log.enabled()) { + log("verify subtree at GG", xtag("GG", GG), + xtag("GG.key", GG->key())); + + RbTreeUtil::verify_subtree_ok(reduce_fn, GG, nullptr /*&black_height*/); + RbTreeUtil::display_aux(D_Invalid, GG, 0 /*depth*/, &log); + + log("fixup complete"); + } + } + + return; + } /*walk toward root until red violation fixed*/ + } /*fixup_red_shape*/ + + /* insert key-value pair (key, value) into *pp_root. + * on exit *pp_root contains new tree with (key, value) inserted. + * returns true if node was inserted, false if instead an existing node + * with the same key was replaced. + * + * Require: + * - pp_root is non-nil (*pp_root may be nullptr -> empty tree) + * - *pp_root is in RB-shape + * + * allow_replace_flag. if true, v will replace an existing value + * associated with key k. + * if false, preserve existing value. + * when k already exists in *pp_root. + * + * return pair with: + * - f=true for new node (k did not exist in tree before this call) + * - f=false for existing node (k already in tree before this call) + * - n=node containing key k + */ + static std::pair + insert_aux(value_type const & kv_pair, + bool allow_replace_flag, + Reduce const & reduce_fn, + RbNode ** pp_root) + { + using xo::xtag; + + //XO_SCOPE2(log, true /*debug_flag*/); + + RbNode * N = *pp_root; + + Direction d = D_Invalid; + + while (N) { + if (kv_pair.first == N->key()) { + if(allow_replace_flag) { + /* match on this key already present in tree + * -> just update assoc'd value + */ + N->contents_.second = kv_pair.second; + } + + /* after modifying a node n, must recalculate reductions + * along path [root .. n] + */ + RbTreeUtil::fixup_ancestor_size(reduce_fn, N); + + //log && log(xtag("path", (char const *)"A")); + + /* since we didn't change the set of nodes, + * tree is still in RB-shape, don't need to call fixup_red_shape() + */ + return std::make_pair(false, N); + } + + d = ((kv_pair.first < N->key()) ? D_Left : D_Right); + + /* insert into left subtree somewhere */ + RbNode *C = N->child(d); + + if (!C) + break; + + N = C; + } + + /* invariant: N->child(d) is nil */ + + if (N) { + RbNode * new_node = RbNode::make_leaf(kv_pair, + reduce_fn.leaf(kv_pair.second)); + + N->assign_child_reparent(d, new_node); + + assert(is_red(N->child(d))); + + /* recalculate Node sizes on path [root .. N] */ + RbTreeUtil::fixup_ancestor_size(reduce_fn, N); + /* after adding a node, must rebalance to restore RB-shape */ + RbTreeUtil::fixup_red_shape(d, N, reduce_fn, pp_root); + + //log && log(xtag("path", (char const *)"B")); + + /* note: new_node=N.child(d) is true before call to fixup_red_shape(), + * but not necessarily after + */ + return std::make_pair(true, new_node); + } else { + *pp_root = RbNode::make_leaf(kv_pair, + reduce_fn.leaf(kv_pair.second)); + + /* tree with a single node might as well be black */ + (*pp_root)->assign_color(C_Black); + + //(*pp_root)->local_recalc_size(reduce_fn); + + /* Node.size will be correct for tree, since + * new node is only node in the tree + */ + + //log && log(xtag("path", (char const *)"C")); + + return std::make_pair(true, *pp_root); + } + + } /*insert_aux*/ + + /* remove a black node N with no children. + * this will reduce black-height along path to N + * by 1, so will need to rebalance tree + * + * pp_root. pointer to location of tree root; + * may update with new root + * + * Require: + * - N != nullptr + * - N has no child nodes + * - N->parent() != nullptr + */ + static void remove_black_leaf(RbNode *N, + Reduce const & reduce_fn, + RbNode **pp_root) + { + using xo::scope; + using xo::xtag; + using xo::print::ccs; + + //constexpr char const *c_self = "RbTreeUtil::remove_black_leaf"; + constexpr bool c_logging_enabled = false; + + scope log(XO_DEBUG(c_logging_enabled)); + + assert(pp_root); + + RbNode *P = N->parent(); + + if (!P) { + /* N was the root node, tree now empty */ + *pp_root = nullptr; + delete N; + return; + } + + /* d: direction in P to immediate child N; + * also sets N.parent to nil + */ + Direction d = P->replace_child_reparent(N, nullptr); + + delete N; + + /* need to delay this assignment until + * we've determined d + */ + N = nullptr; + + /* fixup sizes on path root..P + * subsequent rebalancing rotations will preserve correct .size values + */ + RbTreeUtil::fixup_ancestor_size(reduce_fn, P); + + /* other_d, S, C, D will be assigned by loop below + * + * diagram shown with d=D_Left; mirror left-to-right for d=D_Right + * + * P + * d-> / \ <-other_d + * N S + * / \ + * C D + */ + Direction other_d; + RbNode *S = nullptr; + RbNode *C = nullptr; + RbNode *D = nullptr; + + /* table of outcomes as a function of node color + * + * .=black + * *=red + * x=don't care + * + * #=#of combinations (/16) for P,S,C,D color explained by this row + * + * P S C D case # + * ----------------------- + * . . . . Case(1) 1 + * x * x x Case(3) 8 P,C,D black is forced by RB rules + * * . . . Case(4) 1 + * x . * . Case(5) 2 + * x . x * Case(6) 4 + * -- + * 16 + * + */ + + while (true) { + assert(is_black(N)); /* reminder: nil is black too */ + + /* Invariant: + * - either: + * - N is nil (first iteration only), and + * P->child(d) = nil, or: + * - P is nil and non-nil N is tree root, or: + * - N is an immediate child of P, + * and P->child(d) = N + * - N is black + * - all paths that don't go thru N have prevailing black-height h. + * - paths through N have black-height h-1 + */ + + if (!P) { + /* N is the root node, in which case all paths go through N, + * so black-height is h-1 + */ + *pp_root = N; + return; + } + + other_d = other(d); + S = P->child(other_d); + + /* S can't be nil: since N is non-nil and black, + * it must have a non-nil sibling + */ + assert(S); + + C = S->child(d); + D = S->child(other_d); + + if (log.enabled()) { + log("rebalance at parent P of curtailed subtree N", + xtag("P", P), + xtag("P.col", ccs(P->color() == C_Black ? "B" : "r")), + xtag("P.key", P->key())); + log("with sibling S, nephews C,D", xtag("S", S), + xtag("S.col", ccs(S->color() == C_Black ? "B" : "r")), + xtag("C", C), xtag("D", D)); + } + + if (is_black(P) && is_black(S) && is_black(C) && is_black(D)) { + /* Case(1) */ + + log && log("P,S,C,D all black: mark S red + go up 1 level"); + + /* diagram with d=D_Left: flip left-to-right for d=D_Right + * =black + * *=red + * _=red or black + * + * P + * / \ + * N S + * / \ + * C D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h + * - D at h + */ + + S->assign_color(C_Red); + + /* now have: + * + * G (=P') + * | + * P (=N') + * / \ + * N S* + * / \ + * C D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h-1 + * - D at h-1 + * + * relabel to one level higher in tree + */ + N = P; + P = P->parent(); + d = RbNode::child_direction(P, N); + + continue; + } else { + break; + } + } /*loop looking for a red node*/ + + if (is_red(S)) { + /* Case(3) */ + + if (log.enabled()) { + log("case 3: S red, P,C,D black -> rotate at P to promote S"); + log("case 3: + make P red instead of S"); + log("case 3: with", + xtag("P", P), + xtag("P.col", ccs(P->color() == C_Black ? "B" : "r")), + xtag("P.key", P->key()), xtag("S", S), + xtag("S.col", ccs(S->color() == C_Black ? "B" : "r")), + xtag("S.key", S->key())); + } + + /* since S is red, {P,C,D} are all black + * + * diagram with d=D_Left: flip left-to-right for d=D_Right + * =black + * *=red + * _=red or black + * + * P + * / \ + * N S* + * / \ + * C D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h + * - D at h + */ + + assert(is_black(C)); + assert(is_black(D)); + assert(is_black(P)); + assert(is_black(N)); + + RbTreeUtil::rotate(d, P, reduce_fn, pp_root); + + /* after rotation d at P: + * + * S* + * / \ + * P D + * / \ + * N C + * + * relative to prevailing black-height h: + * - N at h-1 (now goes thru red S) + * - C at H (still goes through black P, red S) + * - D at h-1 (no longer goes thru black P) + */ + + P->assign_color(C_Red); + S->assign_color(C_Black); + + /* after reversing colors of {P,S}: + * + * S + * / \ + * P* D + * / \ + * N C (=S') + * + * relative to prevailing black-height h: + * - N at h-1 (now thru black S, red P instead of red S, black P) + * - C at h (now thru black S, red P instead of red S, black P) + * - D at h (now through black S instead of red S, black P) + */ + + /* now relabel for subsequent cases */ + S = C; + C = S ? S->child(d) : nullptr; + D = S ? S->child(other_d) : nullptr; + } + + assert(is_black(S)); + + if (is_red(P) && is_black(C) && is_black(D)) { + /* Case(4) */ + + if (log.enabled()) { + log("case 4: P red, N,S,C,D black -> recolor and finish"); + log("case 4: with", + xtag("P", P), + xtag("P.col", ccs(P->color() == C_Black ? "B" : "r")), + xtag("P.key", P->key()), xtag("S", S), + xtag("S.col", ccs(S->color() == C_Black ? "B" : "r")), + xtag("S.key", S->key())); + } + + assert(is_black(N)); + + /* diagram with d=D_Left: flip left-to-right for d=D_Right* + * =black + * *=red + * _=red or black + * + * P* + * / \ + * N S + * / \ + * C D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h + * - D at h + */ + + P->assign_color(C_Black); + S->assign_color(C_Red); + + /* after making P black, and S red (swapping colors of P,S): + * + * P + * / \ + * N S* + * / \ + * C D + * + * relative to prevailing black-height h: + * - N at h + * - C at h + * - D at h + * + * and RB-shape is restored + */ + return; + } + + assert(is_black(S) && (is_black(P) || is_red(C) || is_red(D))); + + if (is_red(C) && is_black(D)) { + log && log("case 5: C red, S,D black -> rotate at S"); + + /* diagram with d=D_Left; flip left-to-right for d=D_Right + * + * =black + * *=red + * _=red or black + * + * P_ + * / \ + * N S + * / \ + * C* D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h + * - D at h + */ + + RbTreeUtil::rotate(other_d, S, reduce_fn, pp_root); + + assert(P->child(other_d) == C); + + /* after other(d) rotation at S: + * + * P_ + * / \ + * N C* + * \ + * S + * \ + * D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h-1 (no longer goes thru black S) + * - S at h (now goes thru red C) + * - D at h (now goes thru red C) + */ + + C->assign_color(C_Black); + S->assign_color(C_Red); + + /* after exchanging colors of C,S: + * + * P_ + * / \ + * N C (=S') + * \ + * S* (=D') + * \ + * D + * + * relative to prevailing black-height h: + * - N at h-1 + * - C at h (no longer goes thru black S, but now C black) + * - S at h (no longer red, but now goes thru black C) + * - D at h (now goes thru black C, red S instead of black S) + */ + + /* now relabel to match next and final case */ + D = S; + S = C; + C = nullptr; /* won't be using C past this point */ + + assert(D); + assert(D->is_red()); + + /* fall through to next case */ + } + + if (is_red(D)) { + log && log("case 6: S black, D red -> rotate at P and finish"); + + /* diagram with d=D_Left; flip left-to-right for d=D_Right + * + * Sibling is black, and distant child is red + * + * if N=P->left_child(): + * + * *=red + * _=red or black + * + * P_ + * / \ + * N S + * / \ + * C_ D* + * + * relative to prevailing black-height h: + * - N at h-1 + * - S (+also C,D) at h + */ + + RbTreeUtil::rotate(d, P, reduce_fn, pp_root); + + /* after rotate at P toward d: * + * + * S + * / \ + * P_ D* + * / \ + * N C_ + * + * Now, relative to prevailing black-height h: + * - N at h+1 (paths to N now visit black S) + * - C at h (paths to C still visit P,S) + * - D at: h if P red, + * h-1 if P black + * (paths to D now skip P) + */ + + S->assign_color(P->color()); + P->assign_color(C_Black); + D->assign_color(C_Black); + + /* after recolor: S to old P color, P to black, D to black. + * + * S_ + * / \ + * P D + * / \ + * N C_ + * + * Now, relative to prevailing black-height h: + * - N at h+1 (swapped P, S colors) + * - C at h (paths to C still visit P,S, swapped P,S colors) + * - D at: h if S red (was P red, S black, D red; now S red, D + * black) h if S black (was P black, S black, D red; now S + * black, D black) + * + * RB-shape has been restored + */ + return; + } + } /*remove_black_leaf*/ + + /* remove node with key k from tree rooted at *pp_root. + * on exit *pp_root contains new tree root. + * + * Require: + * - pp_root is non-null. (*pp_root can be null -> tree is empty) + * - *pp_root is in RB-shape + * + * return true if a node was removed; false otherwise. + */ + static bool erase_aux(Key const &k, + Reduce const & reduce_fn, + RbNode **pp_root) { + using xo::scope; + using xo::xtag; + + //constexpr char const *c_self = "RbTreeUtil::erase_aux"; + constexpr bool c_logging_enabled = false; + + scope log(XO_DEBUG(c_logging_enabled)); + + RbNode *N = *pp_root; + + log && log("enter", xtag("N", N)); + + /* + * here the triangle ascii art indicates a tree structure, + * of arbitrary size + * + * o <- this + * / \ + * o-N-o + * / \ + * X + * / \ + * o---R + */ + + N = RbTreeUtil::find_glb(N, k, true /*is_closed*/); + + if (!N || (N->key() != k)) { + /* no node with .key = k present, so cannot remove it */ + return false; + } + + if (c_logging_enabled) + log && log("got lower bound", xtag("N", N), + xtag("N.key", N->key())); + + /* first step is to simplify problem so that we're removing + * a node with 0 or 1 children. + */ + + RbNode *X = N->left_child(); + + if (X == nullptr) { + /* N has 0 or 1 children */ + ; + } else { + /* R will be 'replacement node' for N */ + RbNode *R = RbTreeUtil::find_rightmost(X); + + /* R->right_child() is nil by definition + * + * copy R's (key + value) into N; + * N now serves as container for information previously + * represented by R. + */ + + N->contents_ = R->contents_; + /* (preserving N->parent_, N->child_v_[]) */ + + /* now relabel N as new R (R'), + * and relabel R as new N (N'). + * Then go to work on reduced problem of deleting N'. + * Problem is redueced since now N' has 0 or 1 child. + * + * (Doesn't matter that N' contains key,values of R, + * since we're going to delete it anyway) + */ + N = R; + /* (preserving R->parent_, R->child_v_[]) */ + + /* o + * / \ + * o-R'o + * / + * X + * / \ + * o---N' + */ + } + + RbNode *P = N->parent(); + + /* N has 0 or 1 children + * + * Implications: + * 1. if N is red, it cannot have red children (by RB rules), + * and it cannot have just 1 black child. + * Therefore red N must have 0 children + * -> can delete N without disturbing RB properties + * 2. if N is black: + * 2.1 if N has 1 child S, then S must be red + * (if S were black, that would require N to have a 2nd child + * to preserve equal black-height for all paths) + * -> replace N with S, repainting S black, in place of + * to-be-reclaimed N + * 1.2 if N is black with 0 children, need to rebalance + */ + + if (N->is_red()) { + if (N->is_leaf()) { + /* replace pointer to N with nil in N's parent. */ + + if (P) { + P->replace_child_reparent(N, nullptr); + RbTreeUtil::fixup_ancestor_size(reduce_fn, P); + } else { + /* N was sole root node; tree will be empty after removing it */ + *pp_root = nullptr; + } + + if (c_logging_enabled) + log && log("delete node", xtag("addr", N)); + delete N; + } else { + assert(false); + + /* control can't come here for RB-tree, + * because a red node can't have red children, or just one black + * child. + */ + } + } else /*N->is_black()*/ { + RbNode *R = N->left_child(); + + if (!R) + R = N->right_child(); + + if (R) { + /* if a black node has one child, that child cannot be black */ + assert(R->is_red()); + + /* replace N with R in N's parent, + * + make R black to preserve black-height + */ + R->assign_color(C_Black); + + if (P) { + P->replace_child_reparent(N, R); + RbTreeUtil::fixup_ancestor_size(reduce_fn, P); + } else { + /* N was root node */ + RbNode::replace_root_reparent(R, pp_root); + } + + if (c_logging_enabled) + log && log("delete node", xtag("addr", N)); + delete N; + } else { + /* N is black with no children, + * may need rebalance here + */ + + if (P) { + RbTreeUtil::remove_black_leaf(N, reduce_fn, pp_root); + } else { + /* N was root node */ + *pp_root = nullptr; + + log && log("delete node", xtag("addr", N)); + delete N; + } + } + } + + return true; + } /*erase_aux*/ + + /* verify that subtree at N is in RB-shape. + * will cover subset of RedBlackTree class invariants: + * + * RB2. if N = P->child(d), then N->parent()=P + * RB3. all paths to leaves have the same black height + * RB4. no red node has a red parent + * RB5. inorder traversal visits keys in monotonically increasing order + * RB6. Node::size reports the size of the subtree reachable from that node + * via child pointers + * RB7. Node::reduced reports the value of + * f(f(L, Node::value), R) + * where: L is reduced-value for left child, + * R is reduced-value for right child + * + * returns the #of nodes in subtree rooted at N. + */ + static size_t verify_subtree_ok(Reduce const & reduce_fn, + RbNode const * N, + int32_t * p_black_height) + { + using xo::scope; + using xo::xtag; + using xo::print::ccs; + + constexpr char const *c_self = "RbTreeUtil::verify_subtree_ok"; + + // scope lscope(c_self); + + /* counts #of nodes in subtree rooted at N */ + size_t i_node = 0; + Key const *last_key = nullptr; + /* inorder node index when establishing black_height */ + size_t i_black_height = 0; + /* establish on first leaf node encountered */ + uint32_t black_height = 0; + + auto verify_fn = [c_self, + &reduce_fn, + &i_node, + &last_key, + &i_black_height, + &black_height] (RbNode const *x, + uint32_t bd) + { + /* RB2. if c=x->child(d), then c->parent()=x */ + + if (x->left_child()) { + XO_EXPECT(x == x->left_child()->parent(), + tostr(c_self, (": expect symmetric child/parent pointers"), + xtag("i", i_node), xtag("node[i]", x), + xtag("key[i]", x->key()), + xtag("child", x->left_child()), + xtag("child.key", x->left_child()->key()), + xtag("child.parent", x->left_child()->parent_))); + } + + if (x->right_child()) { + XO_EXPECT(x == x->right_child()->parent(), + tostr(c_self, ": expect symmetric child/parent pointers", + xtag("i", i_node), + xtag("node[i]", x), + xtag("key[i]", x->key()), + xtag("child", x->right_child()), + xtag("child.key", x->right_child()->key()), + xtag("child.parent", x->right_child()->parent_))); + } + + /* RB3. all nodes have the same black-height */ + + if (x->is_leaf()) { + if (black_height == 0) { + black_height = bd; + } else { + XO_EXPECT(black_height == bd, + tostr(c_self, + ": expect all RB-tree nodes to have the same " + "black-height", + xtag("i1", i_black_height), xtag("i2", i_node), + xtag("blackheight(i1)", black_height), + xtag("blackheight(i2)", bd))); + } + } + + /* RB4. a red node may not have a red parent + * (conversely, a red node may not have a red child) + */ + + RbNode *red_child = + ((x->left_child() && x->left_child()->is_red()) + ? x->left_child() + : ((x->right_child() && x->right_child()->is_red()) + ? x->right_child() + : nullptr)); + + XO_EXPECT( + x->is_red_violation() == false, + tostr(c_self, + ccs(": expect RB-shape tree to have no red violations but " + "red y is child of red x"), + xtag("i", i_node), xtag("x.addr", x), + xtag("x.col", ccs((x->color_ == C_Black) ? "B" : "r")), + xtag("x.key", x->key()), + xtag("y.addr", red_child), + xtag("y.col", ccs((red_child->color_ == C_Black) ? "B" : "r")), + xtag("y.key", red_child->key()))); + + /* RB5. inorder traversal visits nodes in strictly increasing key order */ + + if (last_key) { + XO_EXPECT((*last_key) < x->key(), + tostr(c_self, + ": expect inorder traversal to visit keys" + " in strictly increasing order", + xtag("i", i_node), xtag("key[i-1]", *last_key), + xtag("key[i]", x->key()))); + } + + last_key = &(x->key()); + + /* RB6. Node::size reports the size of the subtree reachable from that + * node by child pointers. + */ + XO_EXPECT(x->size() == (tree_size(x->left_child()) + + 1 + + tree_size(x->right_child())), + tostr(c_self, + ": expect Node::size to be 1 + sum of childrens' size", + xtag("i", i_node), + xtag("key[i]", x->key()), + xtag("left.size", tree_size(x->left_child())), + xtag("right.size", tree_size(x->right_child())))); + + /* RB7. Node::reduced reports the value of + * f(f(L, Node::value), R) + * where: L is reduced-value for left child, + * R is reduced-value for right child + */ + auto reduced_pair + = RbNode::reduced_pair(reduce_fn, x); + + XO_EXPECT(reduce_fn.is_equal + (x->reduced1(), reduced_pair.first), + tostr(c_self, + ": expect Node::reduced to be reduce_fn" + " applied to (.L, .value)", + xtag("node.reduced1", x->reduced1()), + xtag("reduced_pair.first", reduced_pair.first))); + + XO_EXPECT(reduce_fn.is_equal + (x->reduced2(), reduced_pair.second), + tostr(c_self, + ": expect Node::reduced to be reduce_fn" + " applied to (.L, .value, .R)", + xtag("node.reduced2", x->reduced2()), + xtag("reduce2_expr", reduced_pair.second))); + + ++i_node; + }; + + RbTreeUtil::inorder_node_visitor(N, 0 /*d*/, verify_fn); + + if (p_black_height) + *p_black_height = black_height; + + return i_node; + } /*verify_subtree_ok*/ + + /* display tree structure, 1 line per node. + * indent by node depth + d + */ + static void display_aux(Direction side, RbNode const *N, uint32_t d, + xo::scope *p_scope) { + using xo::pad; + using xo::xtag; + using xo::print::ccs; + + if (N) { + p_scope->log(pad(d), + xtag("addr", N), + xtag("par", N->parent()), + xtag("side", ccs((side == D_Left) ? "L" + : (side == D_Right) ? "R" + : "root")), + xtag("col", ccs(N->is_black() ? "B" : "r")), + xtag("key", N->key()), + xtag("value", N->value()), + xtag("wt", N->size()), + xtag("reduced1", N->reduced1()), + xtag("reduced2", N->reduced2())); + display_aux(D_Left, N->left_child(), d + 1, p_scope); + display_aux(D_Right, N->right_child(), d + 1, p_scope); + } + } /*display_aux*/ + + static void display(RbNode const *N, uint32_t d) { + using xo::scope; + + scope log(XO_DEBUG(true /*debug_flag*/)); + + display_aux(D_Invalid, N, d, &log); + } /*display*/ + }; /*RbTreeUtil*/ + + /* xo::tree::detail::RedBlackTreeLhsBase + * + * use for const version of RedBlackTree::operator[]. + * + * Require: RbNode is either + * RedBlackTree::RbNode + * or + * RedBlackTree::RbNode const + */ + template + class RedBlackTreeLhsBase { + public: + using mapped_type = typename RedBlackTree::mapped_type; + using RbUtil = typename RedBlackTree::RbUtil; + + public: + RedBlackTreeLhsBase() = default; + RedBlackTreeLhsBase(RedBlackTree * tree, RbNode * node) + : p_tree_(tree), node_(node) + {} + + operator mapped_type const & () const { + using xo::tostr; + + if (!this->node_) { + throw std::runtime_error + (tostr("rbtree: attempt to use empty lhs object as rvalue")); + } + + return this->node_->contents().second; + } /*operator value_type const &*/ + + protected: + RedBlackTree * p_tree_ = nullptr; + /* invariant: if non-nil, .node belongs to .*p_tree */ + RbNode * node_ = nullptr; + }; /*RedBlackTreeLhsBase*/ + + template + class RedBlackTreeConstLhs : public RedBlackTreeLhsBase + { + public: + RedBlackTreeConstLhs() = default; + RedBlackTreeConstLhs(RedBlackTree const * tree, + typename RedBlackTree::RbNode const * node) + : RedBlackTreeLhsBase(tree, node) {} + }; /*RedBlackTreeConstLhs*/ + + /* xo::tree::detail::RedBlackTreeLhs + * + * use for RedBlackTree::operator[]. + * can't return a regular lvalue, + * because assignment within a Node N invalidates partial sums along + * the path from tree root to N. + * + * instead interpolate instance of this class, that can intercept + * asasignments. + */ + template + class RedBlackTreeLhs : public RedBlackTreeLhsBase + { + public: + using value_type = typename RedBlackTree::value_type; + using key_type = typename RedBlackTree::key_type; + using mapped_type = typename RedBlackTree::mapped_type; + using RbUtil = typename RedBlackTree::RbUtil; + using RbNode = typename RedBlackTree::RbNode; + + public: + RedBlackTreeLhs() = default; + RedBlackTreeLhs(RedBlackTree * tree, typename RedBlackTree::RbNode * node, key_type key) + : RedBlackTreeLhsBase(tree, node), key_(key) {} + + RedBlackTreeLhs & operator=(mapped_type const & v) { + using xo::tostr; + + if(this->p_tree_) { + if(this->node_) { + this->node_->contents().second = v; + + /* after modifying a node n, + * must recalculate reductions along path [root .. n] + */ + RbUtil::fixup_ancestor_size(this->p_tree_->reduce_fn(), + this->node_); + } else { + /* insert (key, v) pair into this tree */ + this->p_tree_->insert(value_type(this->key_, v)); + } + } else { + assert(false); + + throw std::runtime_error + (tostr("rbtree: attempt to apply operator= thru empty lhs object")); + } + + return *this; + } /*operator=*/ + + RedBlackTreeLhs & operator+=(mapped_type const & v) { + using xo::tostr; + + if(this->p_tree_) { + if(this->node_) { + this->node_->contents().second += v; + + /* after modifying value at node n, + * must recalculate order statistics along path [root .. n] + */ + RbUtil::fixup_ancestor_size(this->p_tree_->reduce_fn(), + this->node_); + } else { + /* for form's sake, in case value_type is something unusual */ + mapped_type v2; + v2 += v; + + /* insert (key, v) pair into this tree */ + this->p_tree_->insert(value_type(this->key_, v2)); + } + } else { + assert(false); + + throw std::runtime_error + (tostr("rbtree: attempt to apply operator+= through empty lhs object")); + } + + return *this; + } /*operator+=*/ + + /* TODO: + * - operator-=() + * - operator*=() + * - operator/=() + */ + + private: + /* capture key k used in expression tree[k] + * Invariant: + * - if .node is non-null, then .node.key = key + */ + key_type key_; + }; /*RedBlackTreeLhs*/ + + /* tragically, we can't partially specialize an alias template. + * however we /can/ partially specialize a struct that nests a typealias. + */ + template + struct NodeTypeTraits { using NodeType = void; }; + + template + struct NodeTypeTraits { + using NativeNodeType = Node; + using NodeType = NativeNodeType; + using ContentsType = typename NodeType::ContentsType; + using NodePtrType = NodeType *; + }; + + template + struct NodeTypeTraits { + using NativeNodeType = Node; + using NodeType = NativeNodeType const; + using ContentsType = typename NodeType::ContentsType const; + using NodePtrType = NodeType const *; + }; + + /* xo::tree::detail::IteratorBase + * + * shared between const & and non-const red-black-tree iterators. + * + * editor bait: BaseIterator + */ + template + class IteratorBase { + public: + using RbUtil = RbTreeUtil; + using RbNode = Node; + using Traits = NodeTypeTraits; + using ReducedValue = typename Reduce::value_type; + using RbNativeNodeType = typename Traits::NativeNodeType; + using RbNodePtrType = typename Traits::NodePtrType; + using RbContentsType = typename Traits::ContentsType; + + protected: + IteratorBase() = default; + IteratorBase(IteratorDirection dirn, IteratorLocation loc, RbNodePtrType node) + : dirn_{dirn}, location_{loc}, node_{node} {} + IteratorBase(IteratorBase const & x) = default; + + static IteratorBase prebegin_aux(RbNodePtrType node) { + return IteratorBase(ID_Forward, IL_BeforeBegin, node); + } /*prebegin_aux*/ + + static IteratorBase begin_aux(RbNodePtrType node) { + return IteratorBase(ID_Forward, node ? IL_Regular : IL_AfterEnd, node); + } /*begin_aux*/ + + static IteratorBase end_aux(RbNodePtrType node) { + return IteratorBase(ID_Forward, IL_AfterEnd, node); + } /*end_aux*/ + + static IteratorBase rprebegin_aux(RbNodePtrType node) { + return IteratorBase(ID_Reverse, IL_AfterEnd, node); + } /*rprebegin_aux*/ + + static IteratorBase rbegin_aux(RbNodePtrType node) { + return IteratorBase(ID_Reverse, + (node ? IL_Regular : IL_BeforeBegin), + node); + } /*rbegin_aux*/ + + static IteratorBase rend_aux(RbNodePtrType node) { + return IteratorBase(ID_Reverse, + IL_BeforeBegin, + node); + } /*rend_aux*/ + + public: + IteratorLocation location() const { return location_; } + RbNodePtrType node() const { return node_; } + + ReducedValue const & reduced() const { return node_->reduced(); } + + RbContentsType & operator*() const { + this->check_regular(); + return this->node_->contents(); + } /*operator**/ + + RbContentsType * operator->() const { + return &(this->operator*()); + } + + /* true for "just before beginning" and "just after the end" states. + * false otherwise + */ + bool is_sentinel() const { return (this->location_ != IL_Regular); } + /* true unless iterator is in a sentinel state */ + bool is_dereferenceable() const { return !this->is_sentinel(); } + + /* deferenceable iterators are truth-y; + * sentinel iterators are false-y + */ + operator bool() const { return this->is_dereferenceable(); } + + bool operator==(IteratorBase const & x) const { + return (this->location_ == x.location_) && (this->node_ == x.node_); + } /*operator==*/ + + bool operator!=(IteratorBase const & x) const { + return (this->location_ != x.location_) || (this->node_ != x.node_); + } /*operator!=*/ + + void print(std::ostream & os) const { + using xo::xtag; + + os << ""; + } /*print*/ + + /* pre-increment */ + IteratorBase & operator++() { + return ((this->dirn_ == ID_Forward) + ? this->next_step() + : this->prev_step()); + } /*operator++*/ + + /* pre-decrement */ + IteratorBase & operator--() { + return ((this->dirn_ == ID_Forward) + ? this->prev_step() + : this->next_step()); + } /*operator--*/ + + protected: + void check_regular() const { + using xo::tostr; + + if(this->location_ != IL_Regular) + throw std::runtime_error(tostr("rbtree iterator: cannot deref iterator" + " in non-regular state")); + } /*check_regular*/ + + private: + IteratorBase & next_step() { + switch(this->location_) { + case IL_BeforeBegin: + /* .node is first node in tree */ + this->location_ = IL_Regular; + break; + case IL_Regular: + { + RbNodePtrType next_node + = RbUtil::next_inorder_node(const_cast(this->node_)); + + if(next_node) { + this->node_ = next_node; + } else { + this->location_ = IL_AfterEnd; + } + } + break; + case IL_AfterEnd: + break; + } /*operator++*/ + + return *this; + } /*next_step*/ + + IteratorBase & prev_step() { + switch(this->location_) { + case IL_BeforeBegin: + break; + case IL_Regular: + { + RbNode * prev_node = RbUtil::prev_inorder_node(const_cast(this->node_)); + + if(prev_node) { + this->node_ = prev_node; + } else { + this->location_ = IL_BeforeBegin; + } + } + break; + case IL_AfterEnd: + /* .node is already last node in tree */ + this->location_ = IL_Regular; + break; + } + + return *this; + } /*prev_step*/ + + protected: + /* ID_Forward, ID_Reverse */ + IteratorDirection dirn_ = ID_Forward; + /* IL_BeforeBegin, IL_Regular, IL_AfterEnd */ + IteratorLocation location_ = IL_AfterEnd; + /* location = IL_BeforeBegin: .node is leftmost node in tree + * location = IL_Regular: .node is some node in tree, + * iterator refers to that node. + * location = IL_AfterEnd: .node is rightmost node in tree + */ + RbNodePtrType node_ = nullptr; + }; /*IteratorBase*/ + + /* xo::tree::detail::Iterator + * + * inorder iterator over nodes in a red-black tree. + * invalidated on insert or remove operations on the parent tree. + * + * satisfies the std::bidirectional_iterator concept + */ + template + class Iterator : public IteratorBase { + public: + using iterator_concept = std::bidirectional_iterator_tag; + + using RbIteratorBase = IteratorBase; + using RbNode = typename RbIteratorBase::RbNode; + using RbUtil = typename RbIteratorBase::RbUtil; + using ReducedValue = typename Reduce::value_type; + + public: + Iterator() = default; + Iterator(IteratorDirection dirn, IteratorLocation loc, RbNode * n) + : RbIteratorBase(dirn, loc, n) {} + Iterator(Iterator const & x) = default; + Iterator(RbIteratorBase const & x) : RbIteratorBase(x) {} + Iterator(RbIteratorBase && x) : RbIteratorBase(std::move(x)) {} + + static Iterator begin_aux(RbNode const * n) { return RbIteratorBase::begin_aux(n); } + static Iterator end_aux(RbNode const * n) { return RbIteratorBase::end_aux(n); } + + static Iterator rbegin_aux(RbNode const * n) { return RbIteratorBase::rbegin_aux(n); } + static Iterator rend_aux(RbNode const * n) { return RbIteratorBase::rend_aux(n); } + + /* pre-increment */ + Iterator & operator++() { + RbIteratorBase::operator++(); + return *this; + } /*operator++*/ + + /* post-increment */ + Iterator operator++(int) { + Iterator retval = *this; + + ++(*this); + + return retval; + } /*operator++(int)*/ + + /* pre-decrement */ + Iterator & operator--() { + RbIteratorBase::operator--(); + return *this; + } /*operator--*/ + + /* post-decrement */ + Iterator operator--(int) { + Iterator retval = *this; + + --(*this); + + return retval; + } /*operator--(int)*/ + }; /*Iterator*/ + + /* xo::tree::detail::ConstIterator + * + * inorder iterator over nodes in a red-black tree. + * invalidated on insert or remove operations on the parent tree. + * + * satisfies the std::bidirectional_iterator concept + */ + template + class ConstIterator : public IteratorBase { + public: + using iterator_concept = std::bidirectional_iterator_tag; + + using RbIteratorBase = IteratorBase; + using RbNode = typename RbIteratorBase::RbNode; + using RbUtil = typename RbIteratorBase::RbUtil; + using ReducedValue = typename Reduce::value_type; + + public: + ConstIterator() = default; + ConstIterator(IteratorDirection dirn, IteratorLocation loc, RbNode const * node) + : RbIteratorBase(dirn, loc, node) {} + ConstIterator(ConstIterator const & x) = default; + ConstIterator(RbIteratorBase const & x) : RbIteratorBase(x) {} + ConstIterator(RbIteratorBase && x) : RbIteratorBase(std::move(x)) {} + + static ConstIterator prebegin_aux(RbNode const * n) { return RbIteratorBase::prebegin_aux(n); } + static ConstIterator begin_aux(RbNode const * n) { return RbIteratorBase::begin_aux(n); } + static ConstIterator end_aux(RbNode const * n) { return RbIteratorBase::end_aux(n); } + + static ConstIterator rprebegin_aux(RbNode const * n) { return RbIteratorBase::rprebegin_aux(n); } + static ConstIterator rbegin_aux(RbNode const * n) { return RbIteratorBase::rbegin_aux(n); } + static ConstIterator rend_aux(RbNode const * n) { return RbIteratorBase::rend_aux(n); } + + /* pre-increment */ + ConstIterator & operator++() { + RbIteratorBase::operator++(); + return *this; + } /*operator++*/ + + /* post-increment */ + ConstIterator operator++(int) { + ConstIterator retval = *this; + + ++(*this); + + return retval; + } /*operator++(int)*/ + + /* pre-decrement */ + ConstIterator & operator--() { + RbIteratorBase::operator--(); + return *this; + } /*operator--*/ + + /* post-decrement */ + ConstIterator operator--(int) { + ConstIterator retval = *this; + + --(*this); + + return retval; + } /*operator--(int)*/ + }; /*ConstIterator*/ + } /*namespace detail*/ + + struct null_reduce_value {}; + + /* for null reduce, just have it return empty struct; + * otherwise breaks verification (e.g. verify_subtree_ok() below) + */ + template + struct NullReduce { + static constexpr bool is_null_reduce() { return true; } + static constexpr bool is_monotonic() { return false; } + + /* data type for reduced values */ + using value_type = null_reduce_value; + + value_type nil() const { return value_type(); } + value_type leaf(NodeValue const & /*x*/) const { + return nil(); + } + value_type operator()(value_type /*x*/, + NodeValue const & /*value*/) const { return nil(); } + value_type combine(value_type /*x*/, + value_type /*y*/) const { return nil(); } + bool is_equal(value_type /*x*/, value_type /*y*/) const { return true; } + }; /*NullReduce*/ + + inline std::ostream & operator<<(std::ostream & os, + null_reduce_value /*x*/) + { + os << "{}"; + return os; + } /*operator<<*/ + + /* just counts #of distinct values; + * redundant, same as detail::Node<>::size_. + * providing for completeness' sake + */ + template + class OrdinalReduce { + public: + using value_type = std::size_t; + + public: + static constexpr bool is_monotonic() { return true; } + + value_type nil() const { return 0; } + + value_type leaf(Value const & /*x*/) const { + return 1; + } /*leaf*/ + + value_type operator()(value_type acc, + Value const & /*x*/) const { + /* counts #of values */ + return acc + 1; + } + + value_type combine(value_type x, value_type y) const { return x + y; } + bool is_equal(value_type x, value_type y) const { return x == y; } + }; /*OrdinalReduce*/ + + /* reduction for inverting the integral of a non-negative discrete function + * computes sum of values for each subtree + */ + template + struct SumReduce { + using value_type = Value; + + static constexpr bool is_monotonic() { return true; } + + value_type nil() const { return -std::numeric_limits::infinity(); } + value_type leaf(Value const & x) const { + return x; + } /*leaf*/ + + value_type operator()(value_type reduced, + Value const & x) const { + /* sums tree values */ + if(std::isfinite(reduced)) { + return reduced + x; + } else { + /* omit -oo reduced value from .nil() */ + return x; + } + } /*operator()*/ + + value_type combine(value_type const & x, + value_type const & y) const { + /* omit -oo reduced value from .nil() */ + if(!std::isfinite(x)) + return y; + if(!std::isfinite(y)) + return x; + + return x + y; + } /*combine*/ + + bool is_equal(value_type const & x, value_type const & y) const { return x == y; } + }; /*SumReduce*/ + + /* red-black tree with order statistics + */ + template + class RedBlackTree { + static_assert(ReduceConcept); + //static_assert(requires(Reduce r) { r.nil(); }, "missing .nil() method"); + + public: + using key_type = Key; + using mapped_type = Value; + using value_type = std::pair; + using ReducedValue = typename Reduce::value_type; + using RbTreeLhs = detail::RedBlackTreeLhs>; + using RbTreeConstLhs = detail::RedBlackTreeConstLhs>; + using RbUtil = detail::RbTreeUtil; + using RbNode = detail::Node; + using Direction = detail::Direction; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using iterator = detail::Iterator; + using const_iterator = detail::ConstIterator; + + public: + RedBlackTree() = default; + + bool empty() const { return size_ == 0; } + size_type size() const { return size_; } + size_type max_size() const { return std::numeric_limits::max(); } + Reduce const & reduce_fn() const { return reduce_fn_; } + + /* forward const iterators (canonical names) */ + + /* iterator "one before beginning" */ + const_iterator cprebegin() const { + return const_iterator::prebegin_aux(RbUtil::find_leftmost(this->root_)); + } /*cprebegin*/ + + const_iterator cbegin() const { + return const_iterator::begin_aux(RbUtil::find_leftmost(this->root_)); + } /*begin*/ + + const_iterator cend() const { + return const_iterator::end_aux(RbUtil::find_rightmost(this->root_)); + } /*end*/ + + /* forward const iterators (overloaded names) */ + + const_iterator prebegin() const { return this->cprebegin(); } + const_iterator begin() const { return this->cbegin(); } + const_iterator end() const { return this->cend(); } + + /* forward non-const iterators */ + + iterator prebegin() { + return iterator::prebegin_aux(RbUtil::find_leftmost(this->root_)); + } /*prebegin*/ + + iterator begin() { + return iterator::begin_aux(RbUtil::find_leftmost(this->root_)); + } /*begin*/ + + iterator end() { + return iterator::end_aux(RbUtil::find_rightmost(this->root_)); + } /*end*/ + + /* reverse const iterators (canonical names) */ + + /* reverse-iterator, "one after end" */ + const_iterator crprebegin() const { + return const_iterator::rprebegin_aux(RbUtil::find_rightmost(this->root_)); + } /*crprebegin*/ + + const_iterator crbegin() const { + return const_iterator::rbegin_aux(RbUtil::find_rightmost(this->root_)); + } /*crbegin*/ + + const_iterator crend() const { + return const_iterator::rend_aux(RbUtil::find_leftmost(this->root_)); + } /*crend*/ + + /* reverse const iterators (overloaded names) */ + + const_iterator rprebegin() const { return this->crprebegin(); } + const_iterator rbegin() const { return this->crbegin(); } + const_iterator rend() const { return this->crend(); } + + /* reverse non-const iterators */ + + iterator rprebegin() { + return iterator::rprebegin_aux(RbUtil::find_rightmost(this->root_)); + } /*rprebegin*/ + + iterator rbegin() { + return iterator::rbegin_aux(RbUtil::find_rightmost(this->root_)); + } /*rbegin*/ + + iterator rend() { + return iterator::rend_aux(RbUtil::find_leftmost(this->root_)); + } /*rend*/ + + /* require: + * - .size() > 0 + */ + Key const & min_key() const { return this->cbegin().first; } + /* require: + * - .size() > 0 + */ + Key const & max_key() const { const_iterator ix = this->cend(); --ix; return ix->first; } + + /* visit tree contents in increasing key order + * + * Require: + * - Fn(std::pair const &) + */ + template + void visit_inorder(Fn && fn) { + auto visitor_fn = [&fn](RbNode const * x, uint32_t /*d*/) { fn(x->contents()); }; + + RbUtil::inorder_node_visitor(this->root_, + 0 /*depth -- will be ignored*/, + visitor_fn); + } /*visit_inorder*/ + + /* if i in [0 .. .size], return iterator referring to ith inorder node in tree + * otherwise return this->end() + */ + const_iterator find_ith(uint32_t i) const { + RbNode * node = RbUtil::find_ith(this->root_, i); + + if(node) { + return const_iterator(detail::ID_Forward, detail::IL_Regular, node); + } else { + return this->end(); + } + } /*find_ith*/ + + iterator find_ith(uint32_t i) { + RbNode * node = RbUtil::find_ith(this->root_, i); + + if(node) { + return iterator(detail::IL_Regular, node); + } else { + return this->end(); + } + } /*find_ith*/ + + /* find node with key equal to x in this tree. + * on success, return iterator ix with ix->first = x. + * on failure, return this->end() + */ + const_iterator find(Key const & x) const { + RbNode * node = RbUtil::find(this->root_, x); + + if(node) { + return const_iterator(detail::ID_Forward, detail::IL_Regular, node); + } else { + return this->end(); + } + } /*find*/ + + iterator find(Key const & x) { + RbNode * node = RbUtil::find(this->root_, x); + + if (node) { + return const_iterator(detail::ID_Forward, detail::IL_Regular, node); + } else { + return this->end(); + } + } /*find*/ + + /* find node in tree with largest key k such that: + * k <= x, if is_closed + * k < x, if !is_closed + * + * return iterator to that node. + * + * If no such node exists, return the same value as this->cprebegin(); + * + * This satisfies continuity property: + * if: ix = find_glb(k, is_closed), + * then: ix+1 = find_lub(k, !is_closed) + * + * even when ix.is_dereferenceable() is false + */ + const_iterator find_glb(Key const & k, bool is_closed) const { + RbNode * node = RbUtil::find_glb(this->root_, k, is_closed); + + if (node) { + return const_iterator(detail::ID_Forward, + detail::IL_Regular, + node); + } else { + return this->cprebegin(); + } + } /*find_glb*/ + + const_iterator find_lub(Key const & k, bool is_closed) const { + const_iterator ix = this->find_glb(k, !is_closed); + return ++ix; + } /*find_lub*/ + + /* RbTreeConstLhs provides rvalue-substitute for lookup-only in const RedBlackTree + * instances + */ + RbTreeConstLhs operator[](Key const & k) const + { + RbNode const * node = RbUtil::find(this->root_, k); + + return RbTreeConstLhs(this, node); + } /*operator[]*/ + + /* RbTreeLhs defers assignment, so that rbtree can update values of + * Node::reduce along path from root to Node n with n.key = k + * + * + * Note: + * 1. return value remains valid across subsequent inserts and assignments, + * so this is legal: + * RbTree rbtree = ...; + * auto v = rbtree[key1]; + * + * rbtree[key2] = ...; + * rbtree.insert(key3, value3); + * + * v = ...; + * + * 2. return value is not valid across removes, even of distinct keys, + * so this is ILLEGAL: + * RbTree rbtree = ...; + * auto v = rbtree[key1]; + * + * assert(key1 != key2); + * + * rbtree.remove(key2); + * + * v = ...; // undefined behavior, + * // v.node contents may have been copied and v.node deleted + */ + RbTreeLhs operator[](Key const & k) { + std::pair insert_result + = RbUtil::insert_aux(value_type(k, Value() /*used iff creating new node*/), + false /*allow_replace_flag*/, + this->reduce_fn_, + &(this->root_)); + + return RbTreeLhs(this, insert_result.second, k); + } /*operator[]*/ + + /* compute value of reduce applied to the set K of all keys k[j] in subtree + * N with: + * - k[j] <= lub_key if is_closed = true + * - k[j] < lub_key if is_closed = false + * return reduce_fn.nil() if K is empty + */ + ReducedValue reduce_lub(Key const &lub_key, bool is_closed) const { + return RbUtil::reduce_lub(lub_key, + this->reduce_fn_, + is_closed, + this->root_); + } /*reduce_lub*/ + + /* Provided Reduce computes sum, and we call this rbtree f + * with keys k[i] and values v[i]: + * + * returns iterator pointing to i'th key-value pair {k[i],v[i]} in this tree, + * with reduced value r(i) (i.e. RbNode::reduced1); + * where r(i) is the result of reducing all values v[j] with j<=i + * + * editor bait: invert_integral + */ + const_iterator cfind_sum_glb(ReducedValue const & y) const { + using xo::tostr; + using xo::xtag; + + //char const * c_self = "RedBlackTree::find_sum_glb"; + + RbNode * N = RbUtil::find_sum_glb(this->reduce_fn_, + this->root_, + y); + + if(!N) { + /* for no-lower-bound edge cases, return iterator ix + * pointing to 'before the beginning' of this tree. + * + * will have + * ix.is_deferenceable() == false + * (bool)ix == false + */ + return const_iterator(detail::ID_Forward, + detail::IL_BeforeBegin, + RbUtil::find_leftmost(this->root_)); + } + + return const_iterator(detail::ID_Forward, + detail::IL_Regular, + N); + } /*cfind_sum_glb*/ + + const_iterator find_sum_glb(ReducedValue const & y) const { + return this->cfind_sum_glb(y); + } /*find_sum_glb*/ + + /* non-const version of .cfind_sum_glb() */ + iterator find_sum_glb(ReducedValue const & y) { + const_iterator ix = this->cfind_sum_glb(y); + + return iterator(ix.location(), + const_cast(ix.node())); + } /*find_sum_glb*/ + + void clear() { + auto visitor_fn = [](RbNode const * x, uint32_t /*d*/) { + /* RbUtil.postorder_node_visitor() isn't expecting us to + * alter node, but will not examine it after it's deleted + */ + RbNode * xx = const_cast(x); + + delete xx; + }; + + RbUtil::postorder_node_visitor(this->root_, + 0 /*depth -- ignored by lambda*/, + visitor_fn); + + this->size_ = 0; + this->root_ = nullptr; + } /*clear*/ + + std::pair + insert(std::pair const & kv_pair) { + std::pair insert_result + = RbUtil::insert_aux(kv_pair, + true /*allow_replace_flag*/, + this->reduce_fn_, + &(this->root_)); + + if (insert_result.first) + ++(this->size_); + + return (std::pair + (iterator(detail::ID_Forward, + detail::IL_Regular, + insert_result.second), + insert_result.first)); + } /*insert*/ + + std::pair + insert(std::pair && kv_pair) { + using xo::scope; + using xo::xtag; + + constexpr bool c_logging_enabled = false; + scope log(XO_DEBUG(c_logging_enabled)); + + std::pair insert_result + = RbUtil::insert_aux(std::move(kv_pair), + true /*allow_replace_flag*/, + this->reduce_fn_, + &(this->root_)); + + if (insert_result.first) + ++(this->size_); + + return (std::pair + (iterator(detail::ID_Forward, + detail::IL_Regular, + insert_result.second), + insert_result.first)); + } /*insert*/ + + bool erase(Key const & k) { + bool retval = RbUtil::erase_aux(k, + this->reduce_fn_, + &(this->root_)); + + if (retval) + --(this->size_); + + return retval; + } /*erase*/ + + /* verify class invariants. + * unless implementation is broken, or client manages + * to violate api rules, this will always return true. + * + * RB0. if root node is nil then .size is 0 + * RB1. if root node is non-nil, then root->parent() is nil, + * and .size = root->size + * RB2. if N = P->child(d), then N->parent()=P + * RB3. all paths to leaves have the same black height + * RB4. no red node has a red parent + * RB5. inorder traversal visits keys in monotonically increasing order + * RB6. Node::size reports the size of the subtree reachable from that node + * via child pointers + * RB7. Node::reduced reports the value of + * f(f(L, Node::value), R) + * where: L is reduced-value for left child, + * R is reduced-value for right child + * RB8. RedBlackTree.size() equals the #of nodes in tree + */ + bool verify_ok(bool /*throw_flag_not_implemented*/ = true) const { + using xo::scope; + using xo::tostr; + using xo::xtag; + + constexpr const char *c_self = "RedBlackTree::verify_ok"; + constexpr bool c_logging_enabled = false; + + scope log(XO_DEBUG(c_logging_enabled)); + + /* RB0. */ + if (root_ == nullptr) { + XO_EXPECT(size_ == 0, tostr(c_self, ": expect .size=0 with null root", + xtag("size", size_))); + } + + /* RB1. */ + if (root_ != nullptr) { + XO_EXPECT(root_->parent_ == nullptr, + tostr(c_self, ": expect root->parent=nullptr", + xtag("parent", root_->parent_))); + XO_EXPECT(root_->size_ == this->size_, + tostr(c_self, ": expect self.size=root.size", + xtag("self.size", size_), + xtag("root.size", root_->size_))); + } + + /* height (counting only black nodes) of tree */ + int32_t black_height = 0; + + /* n_node: #of nodes in this->root_ */ + size_t n_node = RbUtil::verify_subtree_ok(this->reduce_fn_, + this->root_, + &black_height); + + /* RB8. RedBlackTree.size() equals #of nodes in tree */ + XO_EXPECT(n_node == this->size_, + tostr(c_self, ": expect self.size={#of nodes n in tree}", + xtag("self.size", size_), + xtag("n", n_node))); + + if (c_logging_enabled) + log && log(xtag("size", this->size_), + xtag("blackheight", black_height)); + + return true; + } /*verify_ok*/ + + void display() const { RbUtil::display(this->root_, 0); } /*display*/ + + private: + /* #of key/value pairs in this tree */ + size_t size_ = 0; + /* root of red/black tree */ + RbNode * root_ = nullptr; + /* .reduce_fn :: (Accumulator x Key) -> Accumulator */ + Reduce reduce_fn_; + }; /*RedBlackTree*/ + + template + inline std::ostream & + operator<<(std::ostream &os, + RedBlackTree const &tree) + { + tree.display(); + return os; + } /*operator<<*/ + + template + inline std::ostream & + operator<<(std::ostream & os, + detail::IteratorBase const & iter) + { + iter.print(os); + return os; + } /*operator<<*/ + + } /*namespace tree*/ +} /*namespace xo*/ + +/* end RedBlackTree.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/BplusTreeUtil.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/BplusTreeUtil.hpp new file mode 100644 index 00000000..61c91e95 --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/BplusTreeUtil.hpp @@ -0,0 +1,280 @@ +/* @file BplusTreeNode.hpp */ + +#pragma once + +#include "IteratorUtil.hpp" +#include "bplustree_tags.hpp" +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/tag.hpp" +#include // for std::unique_ptr +#include + +namespace xo { + namespace tree { + /* forward decl (see GenericNode.hpp) */ + template + class GenericNode; + /* forward decl (see InternalNode.hpp) */ + template + class InternalNode; + + namespace detail { + /* forward decl (see Iterator.hpp) */ + template + class ConstIterator; + } + + // ----- NodeType ----- + + enum class NodeType { internal, leaf }; + + inline std::string node_type2str(NodeType x) { + switch(x) { + case NodeType::internal: return "internal"; + case NodeType::leaf: return "leaf"; + } + + return "???"; + } /*node_type2str*/ + + inline std::ostream & operator<<(std::ostream & os, NodeType x) { + os << node_type2str(x); + return os; + } /*operator<<*/ + + /* see bplustree/LeafNode.hpp */ + template + struct LeafNode; + + /* see bplustree/InternalNode.hpp */ + template + struct InternalNode; + + // ----- NodeItem + NodeItemPlaceholder ----- + + template + struct NodeItem {}; + + /* struct with same size as NodeItem, but POD + with trivial ctor/dtor */ + template + struct NodeItemPlaceholder { + std::uint8_t mem_v_[sizeof(NodeItem)]; + }; /*NodeItemPlaceholder*/ + + // ----- FindResult ----- + + /* report a node, along with its location (0-based index) within parent. + * use nullptr for .node if item/node not found + * use 0 for .ix if node is root (i.e. has no parent) + * + * expect ConcreteNodeType = LeafNode<..> | InternalNode<..> + */ + template + struct FindNodeResult { + public: + FindNodeResult() = default; + FindNodeResult(FindNodeResult const & x) = default; + FindNodeResult(std::size_t ix, ConcreteNodeType * node) : ix_{ix}, node_{node} {} + + std::size_t ix() const { return ix_; } + ConcreteNodeType * node() const { return node_; } + + private: + /* 0-based index within parent */ + std::size_t ix_ = 0; + /* a B+ tree node */ + ConcreteNodeType * node_ = nullptr; + }; /*FindNodeResult*/ + + template + struct BplusTreeUtil { + public: + using GenericNodeType = GenericNode; + using InternalNodeType = InternalNode; + using LeafNodeType = LeafNode; + using const_iterator = detail::ConstIterator; + + static std::size_t get_node_size(GenericNodeType const * node) { + return node->size(); + } + + /* only implemented for OrdinalTag = ordinal_enabled */ + static void print_node_size(std::ostream & os, GenericNodeType const * node) { + using xo::xtag; + + os << (node ? node->size() : 0UL); + } + + static const_iterator find_ith(GenericNodeType * generic_node, + std::size_t i_tree, + const_iterator cend) { + using xo::xtag; + + if (!generic_node) + return cend; + + std::size_t iter = 0; + + /* 100-level B+ tree won't fit in memory -- would have at least 2^100 nodes! */ + while (iter < 100) { + switch (generic_node->node_type()) { + case NodeType::leaf: + return const_iterator(detail::ID_Forward /*dirn*/, + detail::IL_Regular /*loc*/, + reinterpret_cast(generic_node), + i_tree /*item_ix*/); + case NodeType::internal: + { + /* scan for ith member (counting from 0) */ + + InternalNodeType const * internal_node + = reinterpret_cast(generic_node); + + std::size_t sum_z = 0; + std::size_t z = 0; + + std::size_t i = 0; + std::size_t n = internal_node->n_elt(); + + for (; ilookup_elt(i).child(); + + z = child_node->size(); + + if (i_tree < sum_z + z) { + /* continue search in i'th child of internal_node; + * accounting for the sum_z members in nodes to the left of i_child + */ + generic_node = child_node; + i_tree = i_tree - sum_z; + break; + } + + sum_z += z; + } + + if (i == n) { + throw std::runtime_error(tostr("BplusTree::find_ith: internal index failure", + xtag("i_tree", i_tree), + xtag("last_z", z), + xtag("n", internal_node->n_elt()), + xtag("sum_z", sum_z))); + } + } + break; + } /*switch*/ + + ++iter; + } /*loop over descending internal node path*/ + + throw std::runtime_error(tostr("BplusTree::find_ith: internal loop failure", + xtag("iter", iter))); + + /* impossible! */ + return cend; + } /*find_ith*/ + + static void node_clear_size(InternalNodeType * node) { + node->clear_size(); + } + + static void node_add_size(InternalNodeType * node, std::size_t incr_z) { + node->add_size(incr_z); + } + + static void node_sub_size(InternalNodeType * node, std::size_t decr_z) { + node->sub_size(decr_z); + } + + static void post_modify_add_ancestor_size(InternalNodeType * node, std::size_t incr_z, bool debug_flag) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag)); + + while (node) { + log && log(xtag("node", node), + xtag("old_z", node->size()), + xtag("incr_z", incr_z)); + + node->add_size(incr_z); + + node = node->parent(); + } + } /*post_modify_add_ancestor_size*/ + + static void post_modify_sub_ancestor_size(InternalNodeType * node, std::size_t decr_z, bool debug_flag) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag)); + + while (node) { + log && log(xtag("node", node), + xtag("old_z", node->size()), + xtag("decr_z", decr_z)); + + node->sub_size(decr_z); + + node = node->parent(); + } + } /*post_modify_sub_ancestor_size*/ + }; + + template + struct BplusTreeUtil { + public: + using GenericNodeType = GenericNode; + using InternalNodeType = InternalNode; + using LeafNodeType = LeafNode; + using const_iterator = detail::ConstIterator; + + static std::size_t get_node_size(GenericNodeType const * node) { return 0; } + + static void print_node_size(std::ostream & os, GenericNodeType const * node) { + os << "n/a"; + } + + /* find_ith not implemented without ordinal feature */ + static const_iterator find_ith(GenericNodeType * generic_node, + std::size_t i_tree, + const_iterator cend) { + throw std::runtime_error("BplusTreeUtil::find_ith: not implemented (requires tags::ordinal_enabled)"); + } + + /* per-node size not implemented, so these are no-ops */ + static void node_clear_size(InternalNodeType * node) {} + static void node_add_size(InternalNodeType * node, std::size_t incr_z) {} + static void node_sub_size(InternalNodeType * node, std::size_t decr_z) {} + static void post_modify_add_ancestor_size(InternalNodeType * node, std::size_t incr_z, bool debug_flag) {} + static void post_modify_sub_ancestor_size(InternalNodeType * node, std::size_t decr_z, bool debug_flag) {} + }; + } /*namespace tree*/ +} /*namespace xo*/ + +namespace logutil { + template + struct nodesize { + explicit nodesize(Node const * x) : node_{x} {} + + Node const * node() const { return node_; } + + private: + Node const * node_ = nullptr; + }; /*nodesize*/ + + template + inline std::ostream & operator<<(std::ostream & os, + nodesize const & x) { + xo::tree::BplusTreeUtil::print_node_size(os, x.node()); + return os; + }; +} /*namespace logutil*/ + +/* end BplusTreeUtil.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/GenericNode.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/GenericNode.hpp new file mode 100644 index 00000000..aa760c45 --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/GenericNode.hpp @@ -0,0 +1,123 @@ +/* @file GenericNode.hpp */ + +#pragma once + +#include "BplusTreeUtil.hpp" +#include "bplustree_tags.hpp" +#include // for std::unique_ptr +#include + +namespace xo { + namespace tree { + /* shim so we can partially specialize */ + template + struct GenericNodeBase { + }; /*GenericNodeBase*/ + + template + struct GenericNodeBase { + /* #of items (key-value pairs) in this subtree */ + virtual std::size_t size() const = 0; + }; /*GenericNodeShim*/ + + // ----- GenericNode ----- + // + // base class for LeafNode, InternalNode + + template + class GenericNode : public GenericNodeBase { + public: + using PropertiesType = Properties; + using InternalNodeType = InternalNode; + using LeafNodeType = LeafNode; + + public: + explicit GenericNode(NodeType ntype, std::size_t branching_factor) + : node_type_{ntype}, branching_factor_{branching_factor} {} + virtual ~GenericNode() = default; + + NodeType node_type() const { return node_type_; } + InternalNodeType * parent() const { return parent_; } + std::size_t n_elt() const { return n_elt_; } + std::size_t branching_factor() const { return branching_factor_; } + + void set_parent(InternalNodeType * x) { this->parent_ = x; } + +#ifdef OBSOLETE + /* #of items (key-value pairs) in this subtree */ + virtual std::size_t size() const = 0; +#endif + + virtual Key const & glb_key() const = 0; + /* support methods for BplusTree::verify() + * with_lub. true to use lub_key; false to ignore + * lub_key. if with_lub=true, strict least upper bound key (in B+ tree) for this subtree; + * all keys in this subtree must be strictly less than lub_key. + * ignored when with_lub=false + * lh_leaf. if null, this subtree contains the smallest key in ancestor B+ tree; + * if non-null, lh_leaf's rightmost key is immediate predecessor + * of leftmost key in this subtree + * rh_leaf. if null, this subtree contains the largest key in ancestor B+ tree; + * if non-null, rh_leaf's leftmost key is immediate successor + * of rightmost key in this subtree + */ + virtual std::size_t verify_helper(InternalNodeType const * parent, + bool with_lub, + Key const & lub_key, + LeafNodeType const * lh_leaf, + LeafNodeType const * rh_leaf) const = 0; + virtual void verify_glb_key(Key const & key) const = 0; + FindNodeResult c_find_min_leaf_node() const; + FindNodeResult c_find_max_leaf_node() const; + + virtual FindNodeResult find_min_leaf_node() = 0; + virtual FindNodeResult find_max_leaf_node() = 0; + + /* notification just before permanently removing this node from B+ tree */ + virtual void notify_remove() {} + + private: + /* NodeType::internal | NodeType::leaf */ + NodeType node_type_; + /* pointer to parent node + * invariant: parent has direct pointer to this node, + * except briefly during construction + */ + InternalNodeType * parent_ = nullptr; + + protected: + /* #of non-empty elements (children) of this node + * + * invariant: + * - .elt_v[i].child.ptr is non-null for 0 <= i < .n_elt + * - for (0 < i < .n_elt): + * .elt_v[i-1].key < .elt_v[i].key + * - elt_v[i].key not defined for (i >= .n_elt) + */ + std::size_t n_elt_ = 0; + /* need to store actual branching factor, for LeafNode/InternalNode dtors */ + std::size_t branching_factor_ = 0; + }; /*GenericNode*/ + + /* const version (non-const version below) */ + template + FindNodeResult const> + GenericNode::c_find_min_leaf_node() const { + InternalNode * self = const_cast *>(this); + + return self->find_min_leaf_node(); + } /*c_find_min_leaf_node*/ + + /* const version (non-const version below) */ + template + FindNodeResult const> + GenericNode::c_find_max_leaf_node() const { + InternalNode * self = const_cast *>(this); + + return self->find_max_leaf_node(); + } /*c_find_max_leaf_node*/ + + } /*namespace tree*/ +} /*namespace xo*/ + +/* end GenericNode.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/InternalNode.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/InternalNode.hpp new file mode 100644 index 00000000..9ef4b7dd --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/InternalNode.hpp @@ -0,0 +1,768 @@ +/* @file InternalNode.hpp */ + +#pragma once + +#include "GenericNode.hpp" +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/tostr.hpp" +#include + +namespace xo { + namespace tree { + // ----- InternalNodeItem ------ + + /* see also: NodeItem */ + template + struct NodeItem { + using GenericNodeType = GenericNode; + + public: + NodeItem() = default; + explicit NodeItem(std::unique_ptr child) + : child_{std::move(child)} { + if (child_) + this->key_ = child_->glb_key(); + } + + Key const & key() const { return key_; } + GenericNodeType * child() const { return child_.get(); } + + std::unique_ptr release_child() { return std::move(child_); } + + void set_key(Key key) { key_ = std::move(key); } + + void notify_remove() { + if (child_) + child_->notify_remove(); + } /*notify_remove*/ + + private: + /* invariant: .key is leftmost key in subtree rooted at .child + * (i.e. greatest lower bound for keys in that subtree) + */ + Key key_; + /* subtree. subtree has minimum key value .key */ + std::unique_ptr child_; + }; /*NodeItem */ + + template + using InternalNodeItem = NodeItem; + + /* struct with same size as InternalNodeItem, but POD + with no ctor/dtor */ + template + using InternalNodeItemPlaceholder = NodeItemPlaceholder; + + /* default implements tags::ordinal_disabled; see partial specialization below for ordinal_enabled */ + template + struct InternalNodeShim : public GenericNode { + public: + using GenericNodeType = GenericNode; + + public: + InternalNodeShim(NodeType ntype, std::size_t branching_factor) : GenericNode{ntype, branching_factor} {} + + protected: + /* not implemented with tags::ordinal_disabled */ + void assign_size(std::size_t z) {} + }; + + template + struct InternalNodeShim : public GenericNode { + public: + using GenericNodeType = GenericNode; + + public: + InternalNodeShim(NodeType ntype, std::size_t branching_factor) : GenericNode{ntype, branching_factor} {} + + void clear_size() { this->size_ = 0; } + void add_size(std::size_t z) { this->size_ += z; } + void sub_size(std::size_t z) { this->size_ -= z; } + + virtual std::size_t size() const override { return size_; } + + protected: + void assign_size(std::size_t z) { this->size_ = z; } + + protected: + std::size_t size_ = 0; + }; /*InternalNodeShim*/ + + /* require: + * - Properties.branching_factor() + */ + template + struct InternalNode : public InternalNodeShim { + public: + using GenericNodeType = GenericNode; + using InternalNodeType = InternalNode; + using LeafNodeType = LeafNode; + using InternalNodeItemPlaceholderType = InternalNodeItemPlaceholder; + using InternalNodeItemType = InternalNodeItem; + + public: + virtual ~InternalNode(); + + /* node size in bytes (increases with branching factor) */ + static std::size_t node_sizeof(std::size_t branching_factor); + + /* use when splitting root node for the first time; + * new root node will be leaf->internal. + * + * require: child_1, child_2 are non-empty + */ + static std::unique_ptr make_2(std::unique_ptr child_1, + std::unique_ptr child_2); + + /* Before: + * + * m = mid_ix + * n = src.n_elt - 1 + * xa @ [m-1] + * xb @ [m] + * xz @ [n-1] + * + * src.elt_v[] + * + * 0 m-1 m n-1 + * +----+-...-+----+----+-...-+----+ + * | x0 | ... | xa | xb | ... | xz | + * +----+-...-+----+----+-...-+----+ + * + * <----------- n items -----------> + * + * After: + * + * src.elt_v[] new_node.elt_v[] + * + * n-m-1 + * 0 m-1 0 v + * +----+-...-+----+ +----+-...-+----+ + * | x0 | ... | xa | | xb | | xz | + * +----+-...-+----+ +----+-...-+----+ + * + * <--- m items ---> <-- n-m items --> + */ + static std::unique_ptr annex(std::size_t mid_ix, + InternalNode * src); + + /* .elt_v[] + * + * 0 k n-1 with: n <= b = branching factor + * +---+---+- ... -+---+- ... -+---+---+ k = lub(key) in {e1..en} + * | e1| e2| | ek| | | en| + * +---+---+- ... -+---+- ... -+---+---+ + * + * retval.first: true if key already present in tree. implies lub_ix_recd.second >= 1 + * retval.second: upper bound (strict) index position in .elt_v[] of key + * + * Cost: O(log(bf)) key comparisons + */ + std::size_t find_lub_ix(Key const & key) const; + + /* warning: requires key is present! */ + std::size_t find_ix(Key const & key) const { return this->find_lub_ix(key) - 1; } + + /* O(bf), but does not rely on key invariants. */ + std::size_t locate_child_by_address(GenericNodeType const * target_child) const; + + InternalNodeItemType & lookup_elt(std::size_t i) { return *(reinterpret_cast(&(elt_v_[i]))); } + + InternalNodeItemType const & lookup_elt(std::size_t i) const { return *(reinterpret_cast(&(elt_v_[i]))); } + + FindNodeResult find_child(Key const & key); + + /* insert node at position ix; moving items starting in .elt_v[ix] one slot to the right */ + void insert_node(std::size_t ix, std::unique_ptr child, bool debug_flag); + + /* remove node at position ix; moving items starting .elt_v[ix+1] one slot to the left; + * if target is a leaf node, also remove from prev_leafnode/next_leafnode list + */ + void remove_node(std::size_t ix, bool debug_flag); + + /* redistribute last n items from left-hand sibling lh to this internal node */ + void prepend_from_lh_sibling(InternalNode * lh, std::size_t n, bool debug_flag); + + /* redistribute first n items from right-hand sibling rh to this internal node */ + void append_from_rh_sibling(std::size_t n, InternalNode * rh); + + void append_rh_sibling(InternalNode * rh) { this->append_from_rh_sibling(rh->n_elt(), rh); } + + /* returns new node with upper half of original element vector (i.e. of this.elt_v[]); + * original updated to retain lower half + */ + std::unique_ptr split_internal(); + + void set_glb_key(Key key) { this->lookup_elt(0).set_key(key); } + + /* memory for InternalNode instances is always created using new[], + * so required to use delete[] to deallocate + */ + void operator delete (void * mem) noexcept { ::operator delete[](mem); } + + // ----- inherited from GenericNode ----- + + virtual Key const & glb_key() const override { return this->lookup_elt(0).key(); } + + virtual std::size_t verify_helper(InternalNode const * parent, + bool with_lub_flag, + Key const & lub_key, + LeafNodeType const * lh_leaf, + LeafNodeType const * rh_leaf) const override; + + virtual void verify_glb_key(Key const & key) const override; + + /* find in subtree_arg the leftmost leaf node (i.e. leaf node with smallest key) */ + virtual FindNodeResult find_min_leaf_node() override; + /* find in subtree_arg the rightmost leaf node (i.e. leaf node with largest key) */ + virtual FindNodeResult find_max_leaf_node() override; + + private: + explicit InternalNode(std::size_t branching_factor); + + private: +#ifdef OBSOLETE + /* total #of elements in this subtree */ + std::size_t size_ = 0; +#endif + /* flexible array; actual size will be .branching_factor(). + * + * .elt_v[i] is created/destroyed as an InternalNodeItemType with non-trivial ctor/dtor. + * we must declare member using POD placeholder to satisfy flexible array rules + * + * invariant: + * - with branching factor b, so range for .elt_v[] is 0 .. b-1: + * - .elt_v[j].child.ptr is null -> {.elt_v[j+1].child.ptr .. .elt_v[b-1].child.ptr} are also null + */ + InternalNodeItemPlaceholderType elt_v_[]; + }; /*InternalNode*/ + + template + InternalNode::~InternalNode() { + /* since we're using flexible array for .elt_v[], need to manually run destructors */ + for (std::size_t i=0, n=this->branching_factor_; ilookup_elt(i).~InternalNodeItemType(); + } + + /* hygiene */ + BplusTreeUtil::node_clear_size(this); + this->n_elt_ = 0; + this->branching_factor_ = 0; + } /*dtor*/ + + template + std::size_t + InternalNode::node_sizeof(std::size_t branching_factor) { + return (sizeof(InternalNode) + + (branching_factor + * sizeof(InternalNodeItemType))); + } /*node_sizeof*/ + + template + std::unique_ptr> + InternalNode::make_2(std::unique_ptr child_1, + std::unique_ptr child_2) { + std::size_t branching_factor = child_1->branching_factor(); + + std::size_t mem_z = node_sizeof(branching_factor); + std::uint8_t * mem = new std::uint8_t[mem_z]; + + assert(child_1->n_elt() > 0); + assert(child_2->n_elt() > 0); + + std::unique_ptr retval(new (mem) InternalNode(branching_factor)); + + child_1->set_parent(retval.get()); + child_2->set_parent(retval.get()); + + retval->assign_size(BplusTreeUtil::get_node_size(child_1.get()) + + BplusTreeUtil::get_node_size(child_2.get())); + retval->n_elt_ = 2; + + retval->lookup_elt(0) = std::move(InternalNodeItemType(std::move(child_1))); + retval->lookup_elt(1) = std::move(InternalNodeItemType(std::move(child_2))); + + return retval; + } /*make_2*/ + + template + std::unique_ptr> + InternalNode::annex(std::size_t mid_ix, + InternalNode * src) + { + std::size_t branching_factor = src->branching_factor(); + + std::size_t mem_z = node_sizeof(branching_factor); + std::uint8_t * mem = new std::uint8_t[mem_z]; + + std::unique_ptr new_node(new (mem) InternalNode(branching_factor)); + + std::size_t hi_ix = src->n_elt(); + + new_node->n_elt_ = hi_ix - mid_ix; + + std::size_t annex_z = 0; + + /* annexing upper-half of *src into new_node */ + for (std::size_t i = 0, n = hi_ix - mid_ix; i < n; ++i) { + InternalNodeItemType & src_slot = src->lookup_elt(mid_ix + i); + InternalNodeItemType & new_slot = new_node->lookup_elt(i); + + annex_z += BplusTreeUtil::get_node_size(src_slot.child()); + + new_slot = std::move(src->lookup_elt(mid_ix + i)); + new_slot.child()->set_parent(new_node.get()); + } + + new_node->assign_size(annex_z); + + /* ordinal_disabled: noop + * ordinal_enabled: bookkeeping for src.size (+ new_node.size, see above) + */ + src->assign_size(BplusTreeUtil::get_node_size(src) - annex_z); + src->n_elt_ = mid_ix; + + return new_node; + } /*annex*/ + + template + std::size_t + InternalNode::find_lub_ix(Key const & key) const { + if (key < this->lookup_elt(0).key()) + return 0; + + std::size_t lo = 0; + std::size_t hi = this->n_elt_; + + while (lo + 1 < hi) { + std::size_t mid = lo + (hi - lo) / 2; + + if (key < this->lookup_elt(mid).key()) + hi = mid; + else + lo = mid; + } + + return hi; + } /*find_lub_ix*/ + + template + std::size_t + InternalNode::locate_child_by_address(GenericNodeType const * target_child) const { + for (std::size_t ix = 0; ix < this->n_elt_; ++ix) { + if (this->lookup_elt(ix).child() == target_child) + return ix; + } + + return static_cast(-1); + } /*locate_child_by_address*/ + + template + FindNodeResult> + InternalNode::find_min_leaf_node() { + FindNodeResult findresult(0, this); + + while (findresult.node() && (findresult.node()->node_type() == NodeType::internal)) { + std::size_t min_ix = 0; + + findresult = FindNodeResult(min_ix, + (reinterpret_cast(findresult.node())) + ->lookup_elt(min_ix /*leftmost child*/).child()); + } + + /* findresult.node()->node_type() == NodeType::leaf (if non-null) */ + + if (!findresult.node()) { + assert(false); + return FindNodeResult(); + } + + assert(findresult.node()->node_type() == NodeType::leaf); + + return FindNodeResult(findresult.ix(), + reinterpret_cast(findresult.node())); + } /*find_min_leaf_node*/ + + template + FindNodeResult> + InternalNode::find_max_leaf_node() { + FindNodeResult findresult(0, this); + + while (findresult.node() && (findresult.node()->node_type() == NodeType::internal)) { + std::size_t max_ix = findresult.node()->n_elt() - 1; + + findresult = FindNodeResult + (max_ix, + (reinterpret_cast(findresult.node())) + ->lookup_elt(max_ix /*rightmost child*/).child()); + } + + /* findresult.node()->node_type() == NodeType::leaf (if non-null) */ + + if (!findresult.node()) { + assert(false); + return FindNodeResult(); + } + + assert(findresult.node()->node_type() == NodeType::leaf); + + return FindNodeResult(findresult.ix(), + reinterpret_cast(findresult.node())); + } /*find_max_leaf_node*/ + + template + FindNodeResult> + InternalNode::find_child(Key const & key) { + std::size_t lub_ix = this->find_lub_ix(key); + + if (lub_ix > 0) + --lub_ix; + + return FindNodeResult(lub_ix, this->lookup_elt(lub_ix).child()); + } /*find_child*/ + + template + void + InternalNode::insert_node(std::size_t ix, std::unique_ptr child, bool debug_flag) + { + using xo::scope; + using xo::tostr; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag), + xtag("self", this), + xtag("n_elt", this->n_elt()), + xtag("bf", this->branching_factor()), + xtag("ix", ix), + xtag("child", child.get())); + + if (this->n_elt_ >= this->branching_factor()) { + assert(false); + throw std::runtime_error(tostr("InternalNode::insert_node: node already full", + xtag("node.n_elt", this->n_elt()), + xtag("branching_factor", this->branching_factor()))); + } + + if (ix > this->n_elt_) { + assert(false); + throw std::runtime_error(tostr("InternalNode::insert_node: insert position out of range", + xtag("ix", ix), + xtag("node.n_elt", this->n_elt()), + xtag("bf", this->branching_factor()))); + } + + std::size_t pos_ix = this->n_elt_; + + while (pos_ix > ix) { + this->lookup_elt(pos_ix) = std::move(this->lookup_elt(pos_ix - 1)); + --pos_ix; + } + + /* WARNING: don't update .size here + * in practice we use .insert_node() when introducing a single new key/value pair; + * when we use .insert_node() we split an existing node, + * and actually just want to increment .size. + * + * We leave this to caller (e.g. BplusTree.internal_insert_aux()) + * because in that context can see the upstream split + */ + // this->size_ += child->n_elt(); + + ++(this->n_elt_); + child->set_parent(this); + this->lookup_elt(ix) = InternalNodeItemType(std::move(child)); + } /*insert_node*/ + + template + void + InternalNode::remove_node(std::size_t ix, bool debug_flag) { + using xo::scope; + using xo::tostr; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag), + xtag("self", this), + xtag("n_elt", this->n_elt()), + xtag("bf", this->branching_factor()), + xtag("ix", ix)); + + if (ix >= this->n_elt_) { + assert(false); + throw std::runtime_error(tostr("InternalNode::remove_node: target position out of range", + xtag("ix", ix), + xtag("node.n_elt", this->n_elt()), + xtag("bf", this->branching_factor()))); + } + + std::size_t pos_ix = ix; + std::size_t end_ix = this->n_elt_ - 1; + + { + InternalNodeItemType & target_item = this->lookup_elt(pos_ix); + + /* WARNING: don't update .size here + * in practice we use .remove_node() when deleting a single new key/value pair; + * when we use .remove_node() we merge existing nodes, + * and actually just want to decrement .size. + * + * We leave this to caller (e.g. BplusTree.internal_remove_aux()) + * because in that context can see the upstream merge + */ + //this->size_ -= target_item.child()->size(); + target_item.notify_remove(); + } + + while (pos_ix < end_ix) { + //scope x1("loop", debug_flag); + //x1(xtag("pos_ix", pos_ix)); + + this->lookup_elt(pos_ix) = std::move(this->lookup_elt(pos_ix + 1)); + ++pos_ix; + } + + --(this->n_elt_); + } /*remove_node*/ + + template + void + InternalNode::prepend_from_lh_sibling(InternalNode * lh, std::size_t n, bool debug_flag) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag), + xtag("@", this), xtag("n", n)); + + if (this->n_elt() + n > this->branching_factor()) { + assert(false); + throw std::runtime_error(tostr("InternalNode.prepend_from_lh_sibling: expected combined #elt <= bf", + xtag("self.n_elt", this->n_elt()), + xtag("n", n), + xtag("bf", this->branching_factor()))); + } + + std::size_t n_lh = lh->n_elt(); + std::size_t n_rh = this->n_elt(); + + /* move elts in *this to the right n steps (starting from the end) */ + for (std::size_t ixp1 = this->n_elt(); ixp1 > 0; --ixp1) { + std::size_t ix = ixp1 - 1; + //x.log("move", xtag("ix", ix), xtag("ix+n", ix+n)); + this->lookup_elt(ix + n) = std::move(this->lookup_elt(ix)); + } + + std::size_t xfer_z = 0; + + /* xfer n elts from upper end of lh, to lower end of *this */ + for (std::size_t ix = 0; ix < n; ++ix) { + //x.log("fill", xtag("ix", ix), xtag("n_lh-n+ix", n_lh - n + ix)); + + InternalNodeItemType & lh_sibling_item = lh->lookup_elt(n_lh - n + ix); + + xfer_z += BplusTreeUtil::get_node_size(lh_sibling_item.child()); + + this->lookup_elt(ix) = std::move(lh_sibling_item); + /* + fixup parent pointer */ + this->lookup_elt(ix).child()->set_parent(this); + } + + BplusTreeUtil::node_add_size(this, xfer_z); + BplusTreeUtil::node_sub_size(lh, xfer_z); + + this->n_elt_ += n; + lh->n_elt_ -= n; + + log && log(xtag("this.glb_key", this->glb_key()), + xtag("this[0].key", this->lookup_elt(0).key())); + + log.end_scope(); + } /*prepend_from_lh_sibling*/ + + template + void + InternalNode::append_from_rh_sibling(std::size_t n, InternalNode * rh) { + using xo::xtag; + + if (this->n_elt() + n > this->branching_factor()) { + assert(false); + throw std::runtime_error(tostr("InternalNode.append_from_rh_sibling: expected combined #elt <= bf", + xtag("self.n_elt", this->n_elt()), + xtag("n", n), + xtag("bf", this->branching_factor()))); + } + + std::size_t n_lh = this->n_elt(); + std::size_t xfer_z = 0; + + for (std::size_t ix = 0; ix < n; ++ix) { + InternalNodeItemType & rh_sibling_item = rh->lookup_elt(ix); + + xfer_z += BplusTreeUtil::get_node_size(rh_sibling_item.child()); + this->lookup_elt(n_lh + ix) = std::move(rh_sibling_item); + /* + fixup parent pointer */ + this->lookup_elt(n_lh + ix).child()->set_parent(this); + } + + BplusTreeUtil::node_add_size(this, xfer_z); + this->n_elt_ += n; + + /* shuffle remaining members of rh sibling n items to the left */ + for (std::size_t ix = 0; ix < rh->n_elt() - n; ++ix) { + rh->lookup_elt(ix) = std::move(rh->lookup_elt(ix + n)); + } + + BplusTreeUtil::node_sub_size(rh, xfer_z); + rh->n_elt_ -= n; + } /*append_from_rh_sibling*/ + + template + std::unique_ptr> + InternalNode::split_internal() { + std::size_t n_elt = this->n_elt_; + std::size_t mid_ix = n_elt / 2; + + return InternalNode::annex(mid_ix, this); + } /*split_internal*/ + + template + std::size_t + InternalNode::verify_helper(InternalNode const * parent, + bool with_lub_flag, + Key const & lub_key, + LeafNodeType const * lh_leaf, + LeafNodeType const * rh_leaf) const + { + using xo::tostr; + using xo::xtag; + + std::size_t retval = 0; + + /* verify immediate parent pointer is correct */ + if (this->parent() != parent) { + throw std::runtime_error(tostr("InternalNode::verify_helper" + ": expected parent pointer to refer to actual parent", + xtag("stored_parent", this->parent()), + xtag("actual_parent", parent))); + } + + std::size_t n = this->n_elt_; + + /* verify all children have same NodeType (either all= internal or all= leaf) */ + NodeType target_child_node_type = NodeType::leaf; + + if (n > 0) + target_child_node_type = this->lookup_elt(0).child()->node_type(); + + LeafNodeType const * prev_lh_leaf = lh_leaf; + + for (std::size_t i=0; i < n; ++i) { + /* check consistent node type */ + NodeType i_nodetype = this->lookup_elt(i).child()->node_type(); + + if ((i > 0) && (i_nodetype != target_child_node_type)) { + throw std::runtime_error(tostr("InternalNode::verify_helper" + ": expected all children to share the same node type", + xtag("i", i), + xtag("elt[0].node_type", target_child_node_type), + xtag("elt[i].node_type", i_nodetype))); + } + + /* nested verify on child subtrees */ + InternalNodeItemType const & i_elt = this->lookup_elt(i); + + LeafNodeType const * next_lh_leaf = ((i+1 < n) + ? this->lookup_elt(i+1).child()->find_min_leaf_node().node() + : rh_leaf); + + retval += i_elt.child()->verify_helper(this, + (i+1 < n) ? true : with_lub_flag, + (i+1 < n) ? this->lookup_elt(i+1).key() : lub_key, + prev_lh_leaf, + next_lh_leaf); + + prev_lh_leaf = i_elt.child()->find_max_leaf_node().node(); + } + + if (Properties::ordinal_tag_value() == tags::ordinal_enabled) { + /* verify stored subtree size is consistent with children's */ + std::size_t sum_z = 0; + + for (std::size_t i=0, n=this->n_elt_; i < n; ++i) { + InternalNodeItemType const & elt = this->lookup_elt(i); + + sum_z += BplusTreeUtil::get_node_size(elt.child()); + } + + std::size_t self_z = BplusTreeUtil::get_node_size(this); + + if (sum_z != self_z) { + throw std::runtime_error(tostr("InternalNode::verify_helper", + ": inconsistent subtree size", + xtag("node", this), + xtag("treez[stored]", self_z), + xtag("treez[computed]", sum_z))); + } + } + + /* verify stored glb_key is correct */ + for (std::size_t i=0, n=this->n_elt_; i < n; ++i) { + InternalNodeItemType const & elt = this->lookup_elt(i); + + elt.child()->verify_glb_key(elt.key()); + } + + /* verify locally stored keys appear in sorted order */ + for (std::size_t i=1; i < n; ++i) { + InternalNodeItemType const & prev = this->lookup_elt(i-1); + InternalNodeItemType const & elt = this->lookup_elt(i); + + if (prev.key() < elt.key()) { + ; + } else { + throw std::runtime_error(tostr("InternalNode::verify_helper" + ": expected local keys in strictly increasing order", + xtag("i", i), + xtag("key(i-1)", prev.key()), + xtag("key(i)", elt.key()))); + } + } + + /* verify highest stored key before parent-supplied upper bound */ + if (with_lub_flag) { + if (this->lookup_elt(n-1).key() < lub_key) { + ; + } else { + throw std::runtime_error(tostr("InternalNode::verify_helper" + ": expected highest local key before parent-supplied lub key", + xtag("n", n), + xtag("key(n-1)", this->lookup_elt(n-1).key()), + xtag("lub_key", lub_key))); + } + } + + return retval; + } /*verify_helper*/ + + template + void + InternalNode::verify_glb_key(Key const & key) const { + InternalNodeItemType const & elt = this->lookup_elt(0); + + elt.child()->verify_glb_key(key); + } /*verify_glb_key*/ + + template + InternalNode::InternalNode(std::size_t branching_factor) + : InternalNodeShim{NodeType::internal, branching_factor} + { + /* must invoke ctor explicitly for each .elt_v[i]. + * compiler doesn't know extent of .elt_v[], since it's a flexible array + */ + for (std::size_t i = 0; i < branching_factor; ++i) { + /* using placement new to force ctor call inside flexible array */ + new (&(this->lookup_elt(i))) InternalNodeItemType(); + } + } /*ctor*/ + + } /*namespace tree*/ +} /*namespace xo*/ + +/* end InternalNode.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/Iterator.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/Iterator.hpp new file mode 100644 index 00000000..ed82f8d8 --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/Iterator.hpp @@ -0,0 +1,355 @@ +/* @file Iterator.hpp */ + +#pragma once + +#include "IteratorUtil.hpp" +#include "LeafNode.hpp" +#include "xo/indentlog/print/tostr.hpp" + +namespace xo { + namespace tree { + namespace detail { + /* TODO: move to tree/IteratorUtil.hpp */ + + /* placeholder - specialize on isConst */ + template + struct NodeTypeTraits { using LeafNodeType = void; }; + + /* non-const node pointer */ + template + struct NodeTypeTraits { + using NativeLeafNodeType = LeafNode; + using LeafNodeType = NativeLeafNodeType; + using NativeContentsType = typename LeafNodeType::ContentsType; + using LeafNodePtrType = LeafNodeType *; + }; + + /* const node pointer */ + template + struct NodeTypeTraits { + using NativeLeafNodeType = LeafNode; + using LeafNodeType = NativeLeafNodeType const; + using NativeContentsType = typename LeafNodeType::ContentsType const; + using LeafNodePtrType = LeafNodeType const *; + }; + + /* shared between const and non-const b+ tree iterators + * + * +------------+ + * |IteratorBase| + * | .dirn | + * | .location | + * | .leafnode | + * | .ix | + * +------------+ + * ^ + * | isa +-------------+ + * +-----------|ConstIterator| + * | | .operator++ | + * | | .operator-- | + * | +-------------+ + * | + * | isa +--------+ + * +-----------|Iterator| + * +--------+ + */ + template + class IteratorBase { + public: + using Traits = NodeTypeTraits; + using BpLeafNodePtrType = typename Traits::LeafNodePtrType; + using BpLeafNodeItemType = typename Traits::LeafNodeType::LeafNodeItemType; + using NativeContentsType = typename Traits::NativeContentsType; + + protected: + IteratorBase() = default; + IteratorBase(IteratorDirection dirn, IteratorLocation loc, BpLeafNodePtrType leaf, std::size_t ix) + : dirn_{dirn}, location_{loc}, leafnode_{leaf}, ix_{ix} {} + IteratorBase(IteratorBase const &) = default; + + static IteratorBase prebegin_aux(BpLeafNodePtrType node) { + return IteratorBase(ID_Forward, IL_BeforeBegin, node, 0 /*ix*/); + } + + static IteratorBase begin_aux(BpLeafNodePtrType node) { + return IteratorBase(ID_Forward, + (node ? IL_Regular : IL_AfterEnd), + node, + 0 /*ix*/); + } + + static IteratorBase end_aux(BpLeafNodePtrType node) { + return IteratorBase(ID_Forward, + IL_AfterEnd, + node, + 0 /*ix*/); + } + + static IteratorBase rprebegin_aux(BpLeafNodePtrType node) { + return IteratorBase(ID_Reverse, + IL_AfterEnd, + node, + 0 /*ix*/); + } + + static IteratorBase rbegin_aux(BpLeafNodePtrType node) { + return IteratorBase(ID_Reverse, + (node ? IL_Regular : IL_BeforeBegin), + node, + (node ? node->n_elt() - 1: 0)); + } + + static IteratorBase rend_aux(BpLeafNodePtrType node) { + return IteratorBase(ID_Reverse, + IL_BeforeBegin, + node, + 0 /*ix*/); + } + + public: + IteratorLocation location() const { return location_; } + BpLeafNodePtrType node() const { return leafnode_; } + BpLeafNodeItemType const * item_addr() const { return &(leafnode_->lookup_elt(this->ix_)); } + + NativeContentsType const & operator*() const { + this->check_regular(); + return this->leafnode_->lookup_elt(this->ix_).kv_pair(); + } /*operator**/ + + NativeContentsType const * operator->() const { + return &(this->operator*()); + } /*operator->*/ + + bool is_sentinel() const { return (this->location_ != IL_Regular); } + bool is_dereferenceable() const { return !this->is_sentinel(); } + + operator bool() const { return this->is_deferenceable(); } + + bool operator==(IteratorBase const & x) const { + return (this->location_ == x.location_) && (this->leafnode_ == x.leafnode_) && (this->ix_ == x.ix_); + } + + bool operator!=(IteratorBase const & x) const { + return (this->location_ != x.location_) || (this->leafnode_ != x.leafnode_) || (this->ix_ != x.ix_); + } + + void print(std::ostream & os) const { + using xo::xtag; + + os << ""; + } /*print*/ + + /* pre-increment */ + IteratorBase & operator++() { + return ((this->dirn_ == ID_Forward) + ? this->next_step() + : this->prev_step()); + } /*operator++*/ + + /* pre-decrement */ + IteratorBase & operator--() { + return ((this->dirn_ == ID_Forward) + ? this->prev_step() + : this->next_step()); + } /*operator--*/ + + private: + IteratorBase & next_step() { + switch(this->location_) { + case IL_BeforeBegin: + /* .leafnode is leftmost node in tree */ + this->location_ = IL_Regular; + break; + case IL_Regular: + { + /* #of elts in node, not #of elts in tree! */ + std::size_t n_elt = this->leafnode_->n_elt(); + + if (this->ix_ + 1 < n_elt) { + ++(this->ix_); + } else if (this->leafnode_->next_leafnode()) { + this->leafnode_ = this->leafnode_->next_leafnode(); + this->ix_ = 0; + } else { + /* preserve .leafnode: + * (a) for == comparison w/ .end() iterator + * (b) so we can iterate backwards from end position + */ + //this->leafnode_ = this->leafnode_->next_leafnode(); + this->location_ = IL_AfterEnd; + this->ix_ = 0; + } + } + break; + case IL_AfterEnd: + break; + } + + return *this; + } /*next_step*/ + + IteratorBase & prev_step() { + switch(this->location_) { + case IL_BeforeBegin: + break; + case IL_Regular: + if (this->ix_ > 0) { + --(this->ix_); + } else if (this->leafnode_->prev_leafnode()) { + this->leafnode_ = this->leafnode_->prev_leafnode(); + this->ix_ = this->leafnode_->n_elt() - 1; + } else /* .ix == 0 && .leafnode.prev_leafnode == nullptr */ { + /* preserve .leafnode: + * (a) for == comparison w/ .prebegin() iterator + * (b) so iterator is reversible; can iterate forwards from prebegin position + */ + this->location_ = IL_BeforeBegin; + } + break; + case IL_AfterEnd: + /* .leafnode is rightmost node in tree */ + this->location_ = IL_Regular; + this->ix_ = this->leafnode_->n_elt() - 1; + break; + } + + return *this; + } /*prev_step*/ + + private: + void check_regular() const { + using xo::tostr; + using xo::xtag; + + if (this->location_ != IL_Regular) { + throw std::runtime_error(tostr("bplustree iterator: cannot deref iterator" + " in sentinel state", + xtag("loc", this->location_))); + } + } /*check_regular*/ + + private: + /* ID_Forward forward iterator + * ID_Reverse reverse iterator + */ + IteratorDirection dirn_ = ID_Forward; + /* IL_BeforeBegin | IL_Regular | IL_AfterEnd + * + * operator++ operator++ + * IL_BeforeBegin --------------> IL_Regular --------------> IL_AfterEnd + * /-> -\ + * | | + * \----------------/ + * operator++ + * + * operator-- operator-- + * IL_BeforeBegin <------------- IL_Regular <-------------- IL_AfterEnd + * /-- <-\ + * | | + * \-----------------/ + * operator-- + * + * + */ + IteratorLocation location_ = IL_AfterEnd; + /* .location .leafnode + * IL_BeforeBegin BplusTree.leafnode_begin (leftmost leaf node) + * IL_Regular any leaf node reachable from BplusTree.leafnode_begin + * (or equivalently from BplusTree.leafnode_end) + * IL_AfterEnd BplusTree.leafnode_end (rightmost leaf node) + */ + BpLeafNodePtrType leafnode_ = nullptr; + /* index position within .leafnode; + * 0 when .location is IL_BeforeBegin | IL_AfterEnd + */ + std::size_t ix_ = 0; + }; /*IteratorBase*/ + + template + class ConstIterator : public IteratorBase { + public: + using iterator_concept = std::bidirectional_iterator_tag; + + using BpIteratorBase = IteratorBase; + using BpLeafNodePtrType = typename BpIteratorBase::BpLeafNodePtrType; + + public: + ConstIterator() = default; + ConstIterator(IteratorDirection dirn, IteratorLocation loc, BpLeafNodePtrType leaf, std::size_t ix) + : IteratorBase(dirn, loc, leaf, ix) {} + ConstIterator(ConstIterator const & x) = default; + ConstIterator(BpIteratorBase const & x) : BpIteratorBase(x) {} + ConstIterator(BpIteratorBase && x) : BpIteratorBase{std::move(x)} {} + + static ConstIterator prebegin_aux(BpLeafNodePtrType leaf) { return BpIteratorBase::prebegin_aux(leaf); } + static ConstIterator begin_aux(BpLeafNodePtrType leaf) { return BpIteratorBase::begin_aux(leaf); } + static ConstIterator end_aux(BpLeafNodePtrType leaf) { return BpIteratorBase::end_aux(leaf); } + + static ConstIterator rprebegin_aux(BpLeafNodePtrType leaf) { return BpIteratorBase::rprebegin_aux(leaf); } + static ConstIterator rbegin_aux(BpLeafNodePtrType leaf) { return BpIteratorBase::rbegin_aux(leaf); } + static ConstIterator rend_aux(BpLeafNodePtrType leaf) { return BpIteratorBase::rend_aux(leaf); } + + /* pre-increment */ + ConstIterator & operator++() { + BpIteratorBase::operator++(); + return *this; + } /*operator++*/ + + /* post-increment */ + ConstIterator operator++(int) { + ConstIterator retval = *this; + + ++(*this); + + return retval; + } /*operator++*/ + + /* pre-decrement */ + ConstIterator & operator--() { + BpIteratorBase::operator--(); + return *this; + } /*operator--*/ + + /* post-decrement */ + ConstIterator operator--(int) { + ConstIterator retval = *this; + + --(*this); + + return retval; + } /*operator--*/ + }; /*ConstIterator*/ + } /*namespace detail*/ + + template + inline std::ostream & + operator<<(std::ostream & os, + detail::IteratorBase const & iter) + { + iter.print(os); + return os; + } /*operator<<*/ + } /*namespace tree*/ +} /*namespace xo*/ + +/* end Iterator.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/IteratorUtil.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/IteratorUtil.hpp new file mode 100644 index 00000000..0a0157af --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/IteratorUtil.hpp @@ -0,0 +1,56 @@ +/* @file IteratorUtil.hpp */ + +#pragma once + +#include + +namespace xo { + namespace tree { + namespace detail { + + enum IteratorDirection { + /* ID_Forward. forward iterator + * ID_Reverse. reverse iterator + */ + ID_Forward, + ID_Reverse + }; /*IteratorDirection*/ + + /* specify iterator location relative to a particular b+ tree node */ + enum IteratorLocation { + /* + * IL_BeforeBegin. if non-empty tree, Iterator.node is the first node + * in the tree (the one with smallest key), + * and iterator refers to the location + * "one before" that first node. + * IL_Regular. iterator refers to member of the tree + * given by Iterator.node + * IL_AfterEnd. if non-empty tree, Iterator.node is the last node + * in the tree (the one with largest key), + * and iterator refers the the location + * "one after" that last node. + */ + IL_BeforeBegin, + IL_Regular, + IL_AfterEnd + }; /*IteratorLocation*/ + + static inline char const * iterator_location_descr(IteratorLocation x) { + switch(x) { + case IL_BeforeBegin: return "before-begin"; + case IL_Regular: return "regular"; + case IL_AfterEnd: return "after-end"; + default: return "???"; + } + } /*iteerator_location_descr*/ + + inline std::ostream & + operator<<(std::ostream & os, IteratorLocation x) { + os << iterator_location_descr(x); + return os; + } /*operator<<*/ + } /*namespace detail*/ + } /*namespace tree*/ +} /*namespace xo*/ + +/* end IteratorUtil.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/LeafNode.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/LeafNode.hpp new file mode 100644 index 00000000..bcb43469 --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/LeafNode.hpp @@ -0,0 +1,684 @@ +/* @file LeafNode.hpp */ + +#pragma once + +#include "GenericNode.hpp" +#include "xo/indentlog/scope.hpp" +#include + +namespace xo { + namespace tree { + + // ----- LeafNodeItem ----- + + template + using LeafNodeItem = NodeItem; + + /* - define for symmetry with NodeItem + * - LeafNodeItem doesn't contain a child pointer; + * it belongs inside a leaf mode, which by definition doesn't have children + */ + template + struct NodeItem { + public: + using ContentsType = std::pair; + + public: + NodeItem() = default; + NodeItem(std::pair kv) : kv_pair_{std::move(kv)} {} + + std::pair const & kv_pair() const { return kv_pair_; } + + Key const & key () const { return kv_pair_.first; } + Value const & value() const { return kv_pair_.second; } + + void assign_value(Value x) { kv_pair_.second = std::move(x); } + + private: + /* key+value pair */ + std::pair kv_pair_; + }; /*NodeItem*/ + + /* struct with same size as LeafNodeItem, but POD + with no ctor/dtor */ + template + using LeafNodeItemPlaceholder = NodeItemPlaceholder; + + template + struct LeafNodeShim : public GenericNode + { + LeafNodeShim(NodeType ntype, std::size_t branching_factor) : GenericNode(ntype, branching_factor) {} + + /* ordinal_enabled: LeafNode will provide .size(): inherits+overrides GenericNodeBase.size() */ + }; + + template + struct LeafNodeShim : public GenericNode + { + LeafNodeShim(NodeType ntype, std::size_t branching_factor) : GenericNode(ntype, branching_factor) {} + + /* ordinal_disabled: LeafNode provides LeafNode::size(), but not used */ + + virtual std::size_t size() const = 0; + }; + + // ----- LeafNode ----- + + /* require: + * - Properties.branching_factor() + */ + template + struct LeafNode : public LeafNodeShim { + public: + using GenericNodeType = GenericNode; + using LeafNodeType = LeafNode; + using LeafNodeItemType = LeafNodeItem; + using LeafNodeItemPlaceholderType = LeafNodeItemPlaceholder; + using InternalNodeType = InternalNode; + + using ContentsType = typename LeafNodeItemType::ContentsType; + + public: + virtual ~LeafNode(); + + /* node size in bytes (increases with branching factor) */ + static std::size_t node_sizeof(std::size_t branching_factor); + + /* named ctor idiom. enforce heap allocation + unique_ptr wrapper */ + static std::unique_ptr make(std::pair kv_pair, + Properties const & properties); + + /* create+return new leaf node that contains all the items in *src from position [lo_ix, hi_ix), + * after this operation size of *src is reduced by (hi_ix - lo_ix) + */ + static std::unique_ptr annex(std::size_t lo_ix, + std::size_t hi_ix, + LeafNode * src); + + LeafNode * prev_leafnode() const { return prev_leafnode_; } + LeafNode * next_leafnode() const { return next_leafnode_; } + + /* .first: true if key in tree already + * .second: index position of (strict) least upper bound in .elt_v[] + * if .n_elt, key has no upper bound in this node + */ + std::pair find_lub_ix(Key const & key) const; + + LeafNodeItemType & lookup_elt(std::size_t i) { return *(reinterpret_cast(&(this->elt_v_[i]))); } + + LeafNodeItemType const & lookup_elt(std::size_t i) const { return *(reinterpret_cast(&(this->elt_v_[i]))); } + + void assign_leaf_value(std::size_t elt_ix, Value value) { + assert(elt_ix < this->n_elt_); + + this->lookup_elt(elt_ix).assign_value(std::move(value)); + } /*assign_leaf_value*/ + + /* assign precdeing leaf node (= LH sibling if share same parent) */ + void assign_prev_leafnode(LeafNode * x) { prev_leafnode_ = x; } + void assign_next_leafnode(LeafNode * x) { next_leafnode_ = x; } + + /* insert new leaf at position ix, associating key -> value + * (shuffle existing elements at ix, ix+1.. 1 position to the right) + */ + void insert_leaf_item(std::size_t ix, + std::pair const & kv_pair, + bool debug_flag); + + /* remove key,value pair at position ix */ + void remove_leaf(std::size_t ix, bool debug_flag); + + /* append n items from left-hand sibling, as new left-most elements + * require: combined #of items must be at most b = branching factor + */ + void prepend_from_lh_sibling(LeafNode * lh, std::size_t n, bool debug_flag); + + /* apepnd n items from right-hand sibling, as new right-most elements + * require: combined #of items must be at most b = branching factor + */ + void append_from_rh_sibling(std::size_t n, LeafNode * rh); + + void append_rh_sibling(LeafNode * rh) { this->append_from_rh_sibling(rh->n_elt(), rh); } + + /* returns new leaf with lower half of original element vector; + * original updated to retain upper half + */ + std::unique_ptr split_leaf_lower(); + + /* returns new leaf with upper half of original element vector; + * original updated to retain lower half + */ + std::unique_ptr split_leaf_upper(); + + /* memory for LeafNode instances is always created using new[], + * so required to use delete[] to deallocate + */ + void operator delete (void * mem) noexcept { ::operator delete[](mem); } + + // ----- Inherited from GenericNode ----- + + virtual std::size_t size() const override { return this->n_elt(); } + + virtual Key const & glb_key() const override { return this->lookup_elt(0).key(); } + + virtual std::size_t verify_helper(InternalNodeType const * parent, + bool with_lub_flag, + Key const & lub_key, + LeafNodeType const * lh_leaf, + LeafNodeType const * rh_leaf) const override; + virtual void verify_glb_key(Key const & key) const override; + virtual FindNodeResult find_min_leaf_node() override; + virtual FindNodeResult find_max_leaf_node() override; + + virtual void notify_remove() override; + + private: + explicit LeafNode(std::size_t branching_factor); + + LeafNode(std::pair const & kv_pair, + std::size_t branching_factor); + + void assign_siblings(LeafNode * prev, LeafNode * next); + + private: + /* previous LeafNode in key order, immediately before (all the keys in) this node. + * use to streamline inorder traversal. + */ + LeafNode * prev_leafnode_ = nullptr; + /* next LeafNode in key order, immediately after (all the keys in) this node. + * streamline inorder traversal. + */ + LeafNode * next_leafnode_ = nullptr; + /* flexible array; actual capacity will be Properties.branching_factor(); + * but only members [0 .. n_elt-1] are defined. + * + * actual type of .elt_v[i] is LeafNodeItem; + * need to use POD LeafNodeItemPlaceholder to satisfy flexible-array rules + */ + LeafNodeItemPlaceholderType elt_v_[]; + }; /*LeafNode*/ + + template + LeafNode::~LeafNode() { + /* since we're using flexible array for .elt_v[], need to manually run destructors */ + for (std::size_t i=0, n=this->branching_factor_; ilookup_elt(i).~LeafNodeItemType(); + } + + /* hygiene */ + this->n_elt_ = 0; + this->branching_factor_ = 0; + } /*dtor*/ + + template + std::size_t + LeafNode::node_sizeof(std::size_t branching_factor) { + /* since we're using flexible array for .elt_v[], need to manually account for it's allocated size */ + + return (sizeof(LeafNode) + + (branching_factor + * sizeof(LeafNodeItem))); + } /*node_sizeof*/ + + template + std::unique_ptr> + LeafNode::make(std::pair kv_pair, + Properties const & properties) + { + using xo::scope; + using xo::xtag; + + std::size_t mem_z = node_sizeof(properties.branching_factor()); + /* storage for LeafNode, including storage cost for flexible array LeafNode.elt_v[] */ + std::uint8_t * mem = new std::uint8_t[mem_z]; + +#ifdef NOT_IN_USE + scope x("LeafNode.make"); + x.log(xtag("sizeof(LeafNode)", sizeof(LeafNode)), + xtag("bf", properties.branching_factor()), + xtag("mem_z", mem_z), + xtag("mem", (void *)mem)); +#endif + + return std::unique_ptr(new (mem) LeafNode(std::move(kv_pair), + properties.branching_factor())); + } /*make*/ + + template + std::unique_ptr> + LeafNode::annex(std::size_t lo_ix, + std::size_t hi_ix, + LeafNode * src) + { + using xo::scope; + using xo::xtag; + + std::size_t branching_factor = src->branching_factor(); + + assert(hi_ix >= lo_ix); + assert(hi_ix - lo_ix <= branching_factor); + + std::size_t mem_z = node_sizeof(branching_factor); + std::uint8_t * mem = new std::uint8_t[mem_z]; + +#ifdef NOT_IN_USE + scope x("LeafNode.annex"); + x.log(xtag("sizeof(LeafNode)", sizeof(LeafNode)), + xtag("bf", branching_factor), + xtag("mem_z", mem_z), + xtag("mem", (void *)mem)); +#endif + + std::unique_ptr new_node(new (mem) LeafNode(branching_factor)); + + std::size_t old_n = src->n_elt(); + + new_node->n_elt_ = hi_ix - lo_ix; + + std::size_t n_annex = hi_ix - lo_ix; + + /* annexing from *src into new_node */ + for (std::size_t i = 0; i < n_annex; ++i) { + LeafNodeItemType & new_slot = new_node->lookup_elt(i); + + new_slot = std::move(src->lookup_elt(lo_ix + i)); + } + + /* shuffle over any remaining items in *src starting from hi_ix */ + for (std::size_t i = lo_ix; i + n_annex < old_n; ++i) { + LeafNodeItemType & slot = src->lookup_elt(i); + + slot = std::move(src->lookup_elt(i + n_annex)); + } + + src->n_elt_ = old_n - n_annex; + + if (lo_ix == 0) { + /* new node builds by taking leftmost elements from src + * -> new node becomes src's predecessor + */ + new_node->assign_siblings(src->prev_leafnode(), src); + } else { + /* new node builds by taking rightmost elements from src + * -> new node becomes src's successor + */ + new_node->assign_siblings(src, src->next_leafnode()); + } + + return new_node; + } /*annex*/ + + template + std::pair + LeafNode::find_lub_ix(Key const & key) const { + if (key < this->lookup_elt(0).key()) + return std::make_pair(false, 0); + + /* promise: return value >= 0 */ + + /* .elt_v[0 .. n_elt-1] are maintained in sorted key order */ + std::size_t lo = 0; + std::size_t hi = this->n_elt_; + + while (lo + 1 < hi) { + /* desired child item will be in range [lo, hi) */ + + std::size_t mid = lo + (hi - lo) / 2; + + if (key < this->lookup_elt(mid).key()) + hi = mid; + else + lo = mid; + } + + /* invariant: + * - lo is a valid index: elt_v[lo].kv_pair reflects outcome of most recent call to BplusTree.insert() + * - .elt_v[lo].key <= key + * - if hi<.n_elt, then key < .elt_v[hi].key + */ + bool presence_flag = (key == this->lookup_elt(lo).key()); + + return std::make_pair(presence_flag, hi); + } /*find_lub_ix*/ + + template + void + LeafNode::insert_leaf_item(std::size_t ix, + std::pair const & kv_pair, + bool debug_flag) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag), + xtag("self", this), + xtag("n_elt", this->n_elt()), + xtag("bf", this->branching_factor()), + xtag("ix", ix), + xtag("key", kv_pair.first), + xtag("value", kv_pair.second)); + + if (this->n_elt_ >= this->branching_factor()) { + assert(false); + throw std::runtime_error(tostr("LeafNode::insert_leaf: leaf already full", + xtag("leaf.n_elt", this->n_elt()), + xtag("branching_factor", this->branching_factor()))); + } + + std::size_t pos_ix = this->n_elt_; + + while (pos_ix > ix) { + //scope x1("loop"); + //x1.log(xtag("pos_ix", pos_ix)); + + this->lookup_elt(pos_ix) = std::move(this->lookup_elt(pos_ix - 1)); + --pos_ix; + } + + ++(this->n_elt_); + this->lookup_elt(ix) = LeafNodeItemType(kv_pair); + + log.end_scope(); + } /*insert_leaf*/ + + template + void + LeafNode::remove_leaf(std::size_t ix, bool debug_flag) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag), + xtag("self", this), + xtag("n_elt", this->n_elt()), + xtag("bf", this->branching_factor()), + xtag("ix", ix)); + + if (this->n_elt_ == 0) { + throw std::runtime_error(tostr("LeafNode::remove_leaf: leaf already empty", + xtag("leaf.n_elt", this->n_elt()), + xtag("branching_factor", this->branching_factor()))); + } + + /* TODO: removal action for position pos_ix (maintain reductions) */ + + std::size_t pos_ix = ix; + std::size_t end_ix = this->n_elt_ - 1; + + while (pos_ix < end_ix) { + //scope x1("loop"); + //x1.log(xtag("pos_ix", pos_ix)); + + this->lookup_elt(pos_ix) = std::move(this->lookup_elt(pos_ix + 1)); + ++pos_ix; + } + + --(this->n_elt_); + } /*remove_leaf*/ + + template + void + LeafNode::prepend_from_lh_sibling(LeafNode * lh, std::size_t n, bool debug_flag) { + using xo::scope; + using xo::xtag; + + scope log(XO_DEBUG(debug_flag), + xtag("n", n)); + + if (this->n_elt() + n > this->branching_factor()) { + assert(false); + throw std::runtime_error(tostr("LeafNode.prepend_from_lh_sibling: expected combined #elt <= bf", + xtag("self.n_elt", this->n_elt()), + xtag("n", n), + xtag("bf", this->branching_factor()))); + } + + std::size_t n_lh = lh->n_elt(); + std::size_t n_rh = this->n_elt(); + + /* move elts in *this to the right n steps */ + for (std::size_t ixp1 = this->n_elt(); ixp1 > 0; --ixp1) { + std::size_t ix = ixp1 - 1; + this->lookup_elt(ix + n) = std::move(this->lookup_elt(ix)); + } + + /* xfer n elts from upper end of lh, to lower end of *this */ + for (std::size_t ix = 0; ix < n; ++ix) { + this->lookup_elt(ix) = lh->lookup_elt(n_lh - n + ix); + } + + this->n_elt_ += n; + lh->n_elt_ -= n; + + /* note: since we didn't create/destroy any LeafNodes, + * .prev_leafnode / .next_leafnode pointers are unchanged + */ + + log.end_scope(); + } /*prepend_from_lh_sibling*/ + + template + void + LeafNode::append_from_rh_sibling(std::size_t n, LeafNode * rh) { + using xo::xtag; + + if (this->n_elt() + n > this->branching_factor()) { + assert(false); + throw std::runtime_error(tostr("LeafNode.append_from_rh_sibling: expected combined #elt <= bf", + xtag("self.n_elt", this->n_elt()), + xtag("n", n), + xtag("bf", this->branching_factor()))); + } + + std::size_t n_lh = this->n_elt(); + + for (std::size_t ix = 0; ix < n; ++ix) { + this->lookup_elt(n_lh + ix) = std::move(rh->lookup_elt(ix)); + /* note: leaf items are key,value pairs; + * no parent pointers to fixup (cf InternalNode.append_from_rh_sibling) + */ + } + + this->n_elt_ += n; + + /* shuffle remaining members of rh sibling n items to the left */ + for (std::size_t ix = 0; ix < rh->n_elt() - n; ++ix) { + rh->lookup_elt(ix) = std::move(rh->lookup_elt(ix + n)); + } + + rh->n_elt_ -= n; + + /* note: since we didn't create/destroy any LeafNodes, + * .prev_leafnode / .next_leafnode pointers are unchanged + */ + + } /*append_from_rh_sibling*/ + + template + std::unique_ptr> + LeafNode::split_leaf_lower() { + std::size_t n_elt = this->n_elt_; + std::size_t mid_ix = n_elt / 2; + + return LeafNode::annex(0, mid_ix, this); + } /*split_leaf_lower*/ + + template + std::unique_ptr> + LeafNode::split_leaf_upper() { + std::size_t n_elt = this->n_elt_; + std::size_t mid_ix = n_elt / 2; + + return LeafNode::annex(mid_ix, n_elt, this); + } /*split_leaf_upper*/ + + template + std::size_t + LeafNode::verify_helper(InternalNodeType const * parent, + bool with_lub_flag, + Key const & lub_key, + LeafNodeType const * lh_leaf, + LeafNodeType const * rh_leaf) const { + using xo::xtag; + + /* verify immediate parent pointer is correct */ + if (this->parent() != parent) { + throw std::runtime_error(tostr("LeafNode::verify_helper" + ": expected parent pointer to refer to actual parent", + xtag("stored_parent", this->parent()), + xtag("actual_parent", parent))); + } + + /* verify locally stored keys appear in sorted order */ + std::size_t n = this->n_elt_; + for (std::size_t i=1; i < n; ++i) { + LeafNodeItemType const & prev = this->lookup_elt(i-1); + LeafNodeItemType const & elt = this->lookup_elt(i); + + if (prev.key() < elt.key()) { + ; + } else { + throw std::runtime_error(tostr("LeafNode::verify_helper" + ": expected local keys in strictly increasing order", + xtag("i", i), + xtag("key(i-1)", prev.key()), + xtag("key(i)", elt.key()))); + } + } + + if (with_lub_flag) { + if (this->lookup_elt(n-1).key() < lub_key) { + ; + } else { + throw std::runtime_error(tostr("LeafNode::verify_helper" + ": expected last local key before parent-supplied lub key", + xtag("n", n), + xtag("key(n-1)", this->lookup_elt(n-1).key()), + xtag("lub_key", lub_key))); + } + } + + /* verify next/prev leafnode pointers are consistent */ + if ((lh_leaf && (lh_leaf->next_leafnode() != this)) + || (this->prev_leafnode() != lh_leaf)) + { + throw std::runtime_error(tostr("LeafNode::verify_helper" + ": inconsistent prev/next leaf pointers", + xtag("parent", parent), + xtag("lh_leaf", lh_leaf), + xtag("lh_leaf.next", lh_leaf ? lh_leaf->next_leafnode() : nullptr), + xtag("self", this), + xtag("self.prev", this->prev_leafnode()))); + } + + if ((this->next_leafnode() != rh_leaf) + || (rh_leaf && (rh_leaf->prev_leafnode() != this))) + { + throw std::runtime_error(tostr("LeafNode::verify_helper" + ": inconsistent prev/next leaf pointers", + xtag("parent", parent), + xtag("self", this), + xtag("self.next", this->next_leafnode()), + xtag("rh_leaf", rh_leaf), + xtag("rh_leaf.prev", rh_leaf ? rh_leaf->prev_leafnode() : nullptr))); + } + + return this->n_elt(); + } /*verify_helper*/ + + template + void + LeafNode::verify_glb_key(Key const & key) const { + using xo::xtag; + + LeafNodeItemType const & elt = this->lookup_elt(0); + + if (elt.key() != key) { + throw std::runtime_error(tostr("LeafNode::verify_glb_key" + ": expected stored greatest-lower-bound key to match leftmost leaf's key", + xtag("@", this), + xtag("reported_key", key), + xtag("actual_key", elt.key()))); + } + } /*verify_glb_key*/ + + template + FindNodeResult> + LeafNode::find_min_leaf_node() { + return FindNodeResult>(0, this); + } /*find_min_leaf_node*/ + + template + FindNodeResult> + LeafNode::find_max_leaf_node() { + return FindNodeResult>(0, this); + } /*c_find_max_leaf_node*/ + + template + void + LeafNode::notify_remove() { + if (this->prev_leafnode_) + this->prev_leafnode_->assign_next_leafnode(this->next_leafnode_); + if (this->next_leafnode_) + this->next_leafnode_->assign_prev_leafnode(this->prev_leafnode_); + } /*notify_remove*/ + + template + LeafNode::LeafNode(std::size_t branching_factor) + : LeafNodeShim(NodeType::leaf, branching_factor) + { + /* must call ctor explicitly for each element. + * compiler can't do this for us, b/c it doesn't know size of flexible array + */ + for (std::size_t i = 0, n = branching_factor; i < n; ++i) { + new (&(this->lookup_elt(i))) LeafNodeItemType(); + } + } + + template + LeafNode::LeafNode(std::pair const & kv_pair, + std::size_t branching_factor) + : LeafNodeShim(NodeType::leaf, branching_factor) + { + using xo::scope; + using xo::xtag; + +#ifdef NOT_USING_DEBUG + scope x("LeafNode.ctor"); +#endif + + this->n_elt_ = 1; + /* since .elt_v[] is a flexible array, need to invoke constructors explicitly + * (compiler doesn't know how many elements there are -> can't do it for us + */ + +#ifdef NOT_USING_DEBUG + x.log(xtag("elt[0]", &(this->lookup_elt(0)))); +#endif + + new (&(this->lookup_elt(0))) LeafNodeItemType(kv_pair); + + for (std::size_t i = 1, n = branching_factor; i < n; ++i) { +#ifdef NOT_USING_DEBUG + x.log(xtag("i", i), + xtag("elt[i]", &(this->lookup_elt(i)))); +#endif + + /* using placement-new to invoke ctor explicitly */ + new (&(this->lookup_elt(i))) LeafNodeItemType(); + } + } /*ctor*/ + + template + void + LeafNode::assign_siblings(LeafNode * p, LeafNode * n) { + if (p) + p->assign_next_leafnode(this); + this->prev_leafnode_ = p; + this->next_leafnode_ = n; + if (n) + n->assign_prev_leafnode(this); + } /*assign_siblings*/ + + } /*namespace tree*/ +} /*namespace xo*/ + + +/* end LeafNode.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/Lhs.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/Lhs.hpp new file mode 100644 index 00000000..3968a76d --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/Lhs.hpp @@ -0,0 +1,68 @@ +/* @file Lhs.hpp */ + +#pragma once + +#include + +namespace xo { + namespace tree { + namespace detail { + /* xo::tree::detail::BplusTreeLhsBase + * + * use for {const + non-const} versions of BplusTree::operator[] + * + * Expect: either: + * Tree = BplusTree + * LeafNodeItem = Tree::LeafNodeItemType + * or + * Tree = BplusTree const + * LeafNodeItem = Tree::LeafNodeItemType const + */ + template + class BplusTreeLhsBase { + public: + using mapped_type = typename Tree::mapped_type; + + public: + BplusTreeLhsBase() = default; + BplusTreeLhsBase(Tree * tree, LeafNodeItem const * item) + : p_tree_{tree}, item_{item} {} + + operator mapped_type const & () const { + //using xo::tostr; + + if (!this->item_) { + throw std::runtime_error + ("bptree: attempt to use empty lhs object as rvalue"); + } + + return this->item_->value(); + } + + protected: + Tree * p_tree_ = nullptr; + /* points to key-value pair (interior to a B+ tree LeafNode */ + LeafNodeItem * item_ = nullptr; + }; /*BplusTreeLhsBase*/ + + /* xo::tree::detail::BplusTreeConstLhs + * + * use for const version of BplusTree::operator[] + */ + template + class BplusTreeConstLhs : public BplusTreeLhsBase + { + public: + BplusTreeConstLhs() = default; + BplusTreeConstLhs(BplusTree const * tree, + typename BplusTree::LeafNodeItemType const * item) + : BplusTreeLhsBase(tree, item) {} + }; /*BplusTreeConstLhs*/ + + } /*namespace detail*/ + } /*namespace tree*/ +} /*namespace xo*/ + +/* end Lhs.hpp */ diff --git a/xo-ordinaltree/include/xo/ordinaltree/bplustree/bplustree_tags.hpp b/xo-ordinaltree/include/xo/ordinaltree/bplustree/bplustree_tags.hpp new file mode 100644 index 00000000..1f589bee --- /dev/null +++ b/xo-ordinaltree/include/xo/ordinaltree/bplustree/bplustree_tags.hpp @@ -0,0 +1,16 @@ +/* @file bplustree_tags.hpp */ + +#pragma once + +namespace xo { + namespace tree { + namespace tags { + /* ordinal_enabled: compute ordinal statistics; + * in particular maintain per-node subtree size + */ + enum ordinal_tag { ordinal_enabled, ordinal_disabled }; + } /*tags*/ + } /*namespace tree*/ +} /*namespace xo*/ + +/* end bplustree_tags.hpp */ diff --git a/xo-ordinaltree/utest/CMakeLists.txt b/xo-ordinaltree/utest/CMakeLists.txt new file mode 100644 index 00000000..30311ce5 --- /dev/null +++ b/xo-ordinaltree/utest/CMakeLists.txt @@ -0,0 +1,25 @@ +# ordinaltree/utest/CMakeLists.txt + +# note: tests in this directory use Catch2-provided main +set(SELF_EXE utest.tree) +set(SELF_SOURCE_FILES tree_utest_main.cpp redblacktree.cpp bplustree.cpp) + +add_executable(${SELF_EXE} ${SELF_SOURCE_FILES}) +xo_include_options2(${SELF_EXE}) + +add_test(NAME ${SELF_EXE} COMMAND ${SELF_EXE}) +target_code_coverage(${SELF_EXE} AUTO ALL) + +# ---------------------------------------------------------------- +# internal dependencies: refcnt, ... + +xo_dependency(${SELF_EXE} refcnt) +xo_dependency(${SELF_EXE} indentlog) +xo_dependency(${SELF_EXE} randomgen) + +# ---------------------------------------------------------------- +# 3rd part dependency: catch2: + +xo_external_target_dependency(${SELF_EXE} Catch2 Catch2::Catch2) + +# end ordinaltree/utest/CMakeLists.txt diff --git a/xo-ordinaltree/utest/bplustree.cpp b/xo-ordinaltree/utest/bplustree.cpp new file mode 100644 index 00000000..825f33b4 --- /dev/null +++ b/xo-ordinaltree/utest/bplustree.cpp @@ -0,0 +1,814 @@ +/* @file bplustree.cpp */ + +#define CATCH_CONFIG_ENABLE_BENCHMARKING + +#include "random_tree_ops.hpp" +#include "xo/ordinaltree/BplusTree.hpp" +#include "xo/randomgen/random_seed.hpp" +#include "xo/randomgen/print.hpp" +#include "xo/indentlog/scope.hpp" +#include "catch2/catch.hpp" + +namespace { + using xo::tree::BplusTree; + using xo::tree::BplusStdProperties; + using xo::tree::NullReduce; + using xo::tree::Machdep; + + using xo::rng::Seed; + + using utest::TreeUtil; + + using xo::scope; + //using xo::scope_setup; + using xo::xtag; + + using BtreeKey = int; + using BtreeValue = double; + using BtreeProperties = BplusStdProperties; + //using BtreeProperties = BplusStdProperties; + using BpTree = BplusTree, + BtreeProperties>; + + /* random test data (e.g. permutation of integers [0 .. n-1]). + * will do various tree operations using these permutations to control order + * in which keys are presented + */ + struct RandomTestData { + RandomTestData(std::size_t n, + xo::rng::xoshiro256ss * p_rgen); + + std::vector const & u1v() const { return u1v_; } + std::vector const & u2v() const { return u2v_; } + std::vector const & u12_v() const { return u12_v_; } + + private: + /* a set comprising n randomly chosen elements drawn from [0 .. 2n-1]. + * here n = .u1v.size = .u2v.size + */ + std::vector u1v_; + /* complement of .u1v w.r.t. [0 .. 2n-1] */ + std::vector u2v_; + /* .u1v + .u2v */ + std::vector u12_v_; + }; /*RandomTestData*/ + + RandomTestData::RandomTestData(std::size_t n, + xo::rng::xoshiro256ss * p_rgen) + : u1v_(n), u2v_(n), u12_v_(2*n) + { + /* permutation of [0 .. 2n-1] */ + std::vector u(2*n); + + for (std::uint32_t i=0; i<2*n; ++i) + u[i] = i; + std::shuffle(u.begin(), u.end(), *p_rgen); + + u1v_ = std::vector(u.begin(), u.begin() + n); + u2v_ = std::vector(u.begin() + n, u.end()); + u12_v_ = std::move(u); + } /*ctor*/ + + /* representation-independent feature benchmarks for tree algorithms. + * + * +------------------+ + * |AbstractTestParams| + * +------------------+ + * ^ + * | isa +----------------+ + * +------------|StdMapTestParams| benchmark std::map (bogey!) + * | +----------------+ + * | + * | isa +---------------+ + * +------------|BtreeTestParams| benchmark BplusTree + * +---------------+ + */ + struct AbstractTestParams { + virtual ~AbstractTestParams() = default; + /* insert benchmark: + * 1. prime tree by inserting RandomTestData.u1v (random subset comprising n draws from [0 .. 2n-1]) + * 2. measure cost of inserting RandomTestData.u2v (complement of u1v w.r.t [0 .. 2n-1]) + */ + virtual void run_insert_benchmark(RandomTestData const & random_testdata) const = 0; + virtual void run_erase_benchmark(RandomTestData const & random_testdata) const = 0; + virtual void run_lookup_benchmark(RandomTestData const & random_testdata) const = 0; + virtual void run_traverse_benchmark(RandomTestData const & random_testdata) const = 0; + }; + + struct StdMapTestParams : public AbstractTestParams { + StdMapTestParams(char const * name) + : test_name_{name} {} + + /* 1. make map containing keys in random_testdata.u1v. + * 2. during construction, interleave inserts against a temporary map, + * to spoil sequential heap allocation (i.e. simulate fragmentation) + */ + std::map make_random_map1(RandomTestData const & random_testdata) const { + std::map tree; + /* 2nd tree to interfere with locality */ + std::map tree2; + + for (std::uint32_t x : random_testdata.u1v()) { + tree.insert({x, 10*x}); + /* 2nd tree to interfere with locality */ + for (std::uint32_t y = 0; y < 8; ++y) + tree2.insert({8*x+y, 10*8*x+y}); + } + + return tree; + } /*make_random_map1*/ + + /* 1. make map containing keys in both random_testdata.u1v + random_testdata.u2v + * 2. during construction, interleave inserts against a temporary map, + * to spoil sequential heap allocation (i.e. simulate fragmentation) + */ + std::map make_random_map12(RandomTestData const & random_testdata) const { + std::map tree; + /* temporary tree to interfere with locality */ + std::map tree2; + + for (std::uint32_t x : random_testdata.u12_v()) { + tree.insert({x, 10*x}); + /* 2nd tree to interfere with memory locality */ + for (std::uint32_t y = 0; y < 8; ++y) + tree2.insert({8*x+y, 10*8*x+y}); + } + + return tree; + } /*make_random_map12*/ + + virtual void run_insert_benchmark(RandomTestData const & random_testdata) const override; + virtual void run_erase_benchmark(RandomTestData const & random_testdata) const override; + virtual void run_lookup_benchmark(RandomTestData const & random_testdata) const override; + virtual void run_traverse_benchmark(RandomTestData const & random_testdata) const override; + + char const * test_name_ = nullptr; + }; + + void + StdMapTestParams::run_insert_benchmark(RandomTestData const & random_testdata) const + { + /* see also: BtreeTestParams::run_insert_benchmark() */ + + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + std::map tree + = std::move(this->make_random_map1(random_testdata)); + + /* benchmark additional inserts */ + clock.measure([&](int seq) { + std::size_t key = random_testdata.u2v()[seq % n]; + double value = 10 * key; + + tree.insert({key, value}); + return tree.size(); + }); + }; + } /*run_insert_benchmark*/ + + void + StdMapTestParams::run_erase_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + std::map tree + = std::move(this->make_random_map12(random_testdata));; + + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + std::size_t key = random_testdata.u1v()[seq % n]; + + //std::clog << "i=" << i << std::endl; + tree.erase(key); + + return tree.size(); + }); + }; + } /*run_erase_benchmark*/ + + void + StdMapTestParams::run_lookup_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + std::map tree + = std::move(this->make_random_map1(random_testdata)); + + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + std::size_t key = random_testdata.u1v()[seq % n]; + + //std::clog << "i=" << i << std::endl; + double value = tree[key]; + + return value; + }); + }; + } /*run_lookup_benchmark*/ + + void + StdMapTestParams::run_traverse_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + std::map tree + = std::move(this->make_random_map1(random_testdata)); + + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + std::size_t key = random_testdata.u1v()[seq % n]; + + //std::clog << "i=" << i << std::endl; + double value = tree[key]; + + return value; + }); + }; + } /*run_traverse_benchmark*/ + + struct BtreeTestParams : public AbstractTestParams { + BtreeTestParams(char const * name, std::size_t bf, bool debug_flag) + : test_name_{name}, branching_factor_{bf}, debug_flag_{debug_flag} {} + + BpTree make_empty_bptree() const { + BtreeProperties properties(branching_factor_, + debug_flag_); + return BpTree(properties); + } + + /* 1. make b+ tree containing keys in random_testdata.u1v. + * 2. during constructions, interleave inserts against a temporary b+ tree, + * to spoil sequential heap allocation (i.e. simulate fragmentation) + */ + BpTree make_random_bptree1(RandomTestData const & random_testdata) const { + BpTree bptree = this->make_empty_bptree(); + /* 2nd tree, just to spoil memory locality */ + BpTree bptree2 = this->make_empty_bptree(); + + for (std::uint32_t x : random_testdata.u1v()) { + bptree.insert(BpTree::value_type(x, 10 * x)); + /* 2nd tree to interfere with locality */ + for (std::uint32_t y = 0; y < 8; ++y) { + bptree2.insert(BpTree::value_type(8*x+y, 10 * (8*x+y))); + } + } + + return bptree; + } /*make_random_bptree1*/ + + BpTree make_random_bptree12(RandomTestData const & random_testdata) const { + BpTree bptree = this->make_empty_bptree(); + /* 2nd tree, just to spoil memory locality */ + BpTree bptree2 = this->make_empty_bptree(); + + for (std::uint32_t x : random_testdata.u12_v()) { + bptree.insert(BpTree::value_type(x, 10 * x)); + /* 2nd tree to interfere with locality */ + for (std::uint32_t y = 0; y < 8; ++y) { + bptree2.insert(BpTree::value_type(8*x+y, 10 * (8*x+y))); + } + } + + return bptree; + } /*make_random_bptree12*/ + + void run_unit_test(xo::rng::xoshiro256ss * p_rgen) const; + + virtual void run_insert_benchmark(RandomTestData const & random_testdata) const override; + virtual void run_erase_benchmark(RandomTestData const & random_testdata) const override; + virtual void run_lookup_benchmark(RandomTestData const & random_testdata) const override; + virtual void run_traverse_benchmark(RandomTestData const & random_testdata) const override; + + /* test (or benchmark) name -- 1st argument to catch2 TEST_CASE() / BENCHMARK() / SECTION() macro */ + char const * test_name_ = nullptr; + /* exercise B+ tree with this branching factor */ + std::size_t branching_factor_ = 0; + /* for benchmarks only: if true enable verbose logging of B+ tree operations. otherwise not used */ + bool debug_flag_ = false; + }; /*BtreeTestParams*/ + + void + BtreeTestParams::run_unit_test(xo::rng::xoshiro256ss * p_rgen) const + { + std::size_t branching_factor = this->branching_factor_; + + /* perform a series of tests with increasing scale */ + for (std::uint32_t n = 0; n <= 1024;) { + if (n == 0) { + bool ok_flag = false; + + for (std::uint32_t attention = 0; !ok_flag && (attention < 2); ++attention) { + ok_flag = true; + + bool debug_flag = (attention == 1); + + BtreeProperties properties(branching_factor, + debug_flag); + BpTree bptree(properties); + + scope log(XO_DEBUG2(debug_flag, "bptree"), + xtag("vm_page_size", Machdep::get_page_size()), + xtag("branching_factor", bptree.branching_factor()), + xtag("leaf_node_size", sizeof(BpTree::LeafNodeType)), + xtag("internal_node_size", sizeof(BpTree::InternalNodeType))); + + REQUIRE_ORCAPTURE(ok_flag, debug_flag, bptree.size() == 0); + REQUIRE_ORCAPTURE(ok_flag, debug_flag, bptree.verify_ok(true) == true); + + log && log(xtag("size", n)); + + ok_flag &= TreeUtil::check_bidirectional_iterator(0 /*dvalue - not used*/, + debug_flag, + bptree); + + ok_flag &= TreeUtil::test_clear(debug_flag, &bptree); + + log.end_scope(); + } + } else { + /* for each tree size, do multiple trials; + * choosing different pseudorandom key order for each trial + */ + for (std::uint32_t trial = 0; trial < 10; ++trial) { + /* repeated trials with different rng state */ + + bool ok_flag = false; + + for (std::uint32_t attention = 0; !ok_flag && (attention < 2); ++attention) { + ok_flag = true; + + /* attention=0: + * - no logging + * - detect assertion failures, but don't report them to catch + * attention=1: + * - only runs if failure detected with attention=0 + * - full logging + * - report to catch + */ + + bool debug_flag = (attention == 1); + + BtreeProperties properties(branching_factor, + debug_flag); + BpTree bptree(properties); + + scope log(XO_DEBUG2(debug_flag, "bptree"), + xtag("vm_page_size", Machdep::get_page_size()), + xtag("branching_factor", bptree.branching_factor()), + xtag("leaf_node_size", sizeof(BpTree::LeafNodeType)), + xtag("internal_node_size", sizeof(BpTree::InternalNodeType))); + + REQUIRE_ORCAPTURE(ok_flag, debug_flag, bptree.size() == 0); + REQUIRE_ORCAPTURE(ok_flag, debug_flag, bptree.verify_ok(true) == true); + + log && log(xtag("size", n), xtag("trial", trial)); + + /* insert [0..n-1] in random order */ + ok_flag &= TreeUtil::random_inserts(n, debug_flag, p_rgen, &bptree); + + /* verification problem -> print tree */ + log && log(xtag("bptree", (char const *)"...")); + if (log) bptree.print(std::cout, log.nesting_level() + 2); + + try { + REQUIRE_ORCAPTURE(ok_flag, debug_flag, bptree.verify_ok(debug_flag)); + } catch(std::exception & ex) { + log && log(xtag("exception", ex.what())); + } + + if (properties.ordinal_enabled()) { + ok_flag &= TreeUtil::check_ordinal_lookup(0 /*dvalue*/, + debug_flag, + bptree); + } + + /* verify inorder traverse, using iterator api */ + ok_flag &= TreeUtil::check_bidirectional_iterator(0, + debug_flag, + bptree); + + ok_flag &= TreeUtil::random_lookups(debug_flag, + bptree, + p_rgen); + + if (properties.ordinal_enabled()) { + /* paranoid check that iteration / random_lookups didn't somehow disturb tree */ + ok_flag &= TreeUtil::check_ordinal_lookup(0 /*dvalue*/, + debug_flag, + bptree); + } + + /* TODO: + * - check_reduced_sum() + * - check_ordinal_lookup() + * - check_bidirectional_iterator() + * - random_updates() + * - check_ordinal_lookup() + * - check_bidirectional_iterator() + * - check_reduced_sum() + */ + + /* remove [0..n-1] in random order */ + ok_flag &= TreeUtil::random_removes(debug_flag, p_rgen, &bptree); + + /* insert [0..n-1] again, so we can test .clear() */ + ok_flag &= TreeUtil::random_inserts(n, debug_flag, p_rgen, &bptree); + + ok_flag &= TreeUtil::test_clear(debug_flag, &bptree); + + log.end_scope(); + } /*loop over attention value*/ + } /*loop over trial#*/ + } + + if (n == 0) + n = 1; + else + n = 2*n; + } + } /*run_unit_test*/ + + void + BtreeTestParams::run_insert_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + BpTree bptree = std::move(this->make_random_bptree1(random_testdata)); + + /* benchmark additional inserts (don't want to benchmark on empty tree) */ + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + std::size_t key = random_testdata.u2v()[seq % n]; + double value = 10 * key; + + bptree.insert(BpTree::value_type(key, value)); + + return bptree.size(); + }); + }; + } /*run_insert_benchmark*/ + + void + BtreeTestParams::run_erase_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + /* b+ tree with 2n elements */ + BpTree bptree = std::move(this->make_random_bptree12(random_testdata)); + + /* measure time to remove n elements */ + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + //std::clog << "i=" << i << std::endl; + bptree.erase(random_testdata.u1v()[seq % n]); + + return bptree.size(); + }); + }; + } /*run_erase_benchmark*/ + + void + BtreeTestParams::run_lookup_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + BpTree bptree = std::move(this->make_random_bptree1(random_testdata)); + + /* benchmark random lookups */ + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + std::size_t key = random_testdata.u1v()[seq % n]; + + double value = bptree[key]; + + return value; + }); + }; + } /*run_lookup_benchmark*/ + + void + BtreeTestParams::run_traverse_benchmark(RandomTestData const & random_testdata) const + { + BENCHMARK_ADVANCED(this->test_name_)(Catch::Benchmark::Chronometer clock) + { + std::size_t n = random_testdata.u1v().size(); + + BpTree bptree = std::move(this->make_random_bptree1(random_testdata)); + + /* benchmark traverse */ + BpTree::const_iterator ix = bptree.begin(); + + clock.measure([&](int seq) { + /* catch2 decides how many times to run this lambda, + * in effort to get statistically valid sample. + * + * If it calls lambda n times, then seq will increase from [0 .. n-1] + */ + + if (seq % n == 0) + ix = bptree.begin(); + + return ix++; + }); + }; + } /*run_traverse_benchmark*/ + + TEST_CASE("bptree", "[bplustree]") { + uint64_t seed = 14950349842636922572UL; + /* can reseed from /dev/random with: */ + //Seed seed; + + auto rgen = xo::rng::xoshiro256ss(seed); + + /* exercise multiple branching factors */ + std::array const params_v + = {{ + BtreeTestParams("bf=4", + 4 /*branching_factor*/, + false /*debug_flag - not used*/), + BtreeTestParams("bf=12", + 12 /*branching_factor*/, + false /*debug_flag - not used*/), + BtreeTestParams("bf=28", + 28 /*branching_factor*/, + false /*debug_flag - not used*/), + BtreeTestParams("bf=60", + 60 /*branching_factor*/, + false /*debug_flag - not used*/) + }}; + + for (std::uint32_t i_pm = 0; i_pm < params_v.size(); ++i_pm) { + SECTION(params_v[i_pm].test_name_) { + params_v[i_pm].run_unit_test(&rgen); + } + } + } /*TEST_CASE(bptree)*/ + + /* to run: + * $ ./utest.tree [!benchmark] + * + * looks like ospage4 (1k nodes) gets best performance + */ + TEST_CASE("bptree-benchmark", "[!benchmark]") { + using BtreeProperties = BplusStdProperties; + + /* 2 cache lines per node (though note that we're not aligning nodes on cacheline boundaries) */ + std::size_t const c_cacheline_branching_factor = 4; // BtreeProperties::default_cacheline_branching_factor(); + std::size_t const c_ospage16_branching_factor = BtreeProperties::branching_factor_for_size(Machdep::get_page_size() / 16); + std::size_t const c_ospage8_branching_factor = BtreeProperties::branching_factor_for_size(Machdep::get_page_size() / 8); + std::size_t const c_ospage4_branching_factor = BtreeProperties::branching_factor_for_size(Machdep::get_page_size() / 4); + std::size_t const c_ospage2_branching_factor = BtreeProperties::branching_factor_for_size(Machdep::get_page_size() / 2); + std::size_t const c_ospage1_branching_factor = BtreeProperties::branching_factor_for_size(Machdep::get_page_size()); + + /* random seed -- we don't need deterministic behavior for benchmarking, unless we encounter internal logic error */ + //std::uint64_t seed = 17372468046414980217UL; + Seed seed; + + auto rgen = xo::rng::xoshiro256ss(seed); + + constexpr bool c_debug_flag = false; + + /* n keys [0 .. n-1] */ + std::uint32_t n = 25000; + + RandomTestData random_testdata(n, &rgen); + +#ifdef OBSOLETE + /* random permutation of [0..n-1] */ + std::vector u(n); + { + for (std::uint32_t i=0; i u2(n); + { + for (std::uint32_t i=0; i, 7> const params_v + = {{ + std::unique_ptr(new StdMapTestParams("std-map-insert")), + std::unique_ptr(new BtreeTestParams("bplus-min-insert", + c_cacheline_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage16-insert", + c_ospage16_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage8-insert", + c_ospage8_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage4-insert", + c_ospage4_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage2-insert", + c_ospage2_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage-insert", + c_ospage1_branching_factor, + false)) + }}; + + /* note: w/cacheline: + * getting 593ms for 10^6 inserts; + * i.e. ~593ns each + * w/ospage: + * getting 188ms for 10^6 inserts; + * i.e. ~188ns each + * (with ospage size 4k -> branching factor 252) + */ + for(std::uint32_t i_bm = 0; i_bm < params_v.size(); ++i_bm) { + params_v[i_bm]->run_insert_benchmark(random_testdata); + } + } + + { + std::array, 7> const params_v + = {{ + std::unique_ptr(new StdMapTestParams("std-map-erase")), + std::unique_ptr(new BtreeTestParams("bplus-min-remove", + c_cacheline_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage16-remove", + c_ospage16_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage8-remove", + c_ospage8_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage4-remove", + c_ospage8_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage2-remove", + c_ospage8_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage1-remove", + c_ospage1_branching_factor, + false)) + }}; + + /* note: cacheline: getting 72us for 10^2 removes; + * i.e. ~7.2ns each + * + * ospage: getting 243us for 10^4 removes; + * i.e. ~24ns each + */ + for (std::uint32_t i_bm = 0; i_bm < params_v.size(); ++i_bm) { + params_v[i_bm]->run_erase_benchmark(random_testdata); + } + } + + { + std::array, 7> const params_v + = {{ + std::unique_ptr(new StdMapTestParams("std-map-lookup")), + std::unique_ptr(new BtreeTestParams("bplus-min-lookup", + c_cacheline_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage16-lookup", + c_ospage16_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage8-lookup", + c_ospage8_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage4-lookup", + c_ospage4_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage2-lookup", + c_ospage2_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage1-lookup", + c_ospage1_branching_factor, + false)) + }}; + + /* note: cacheline: + * getting 850us for 10^4 lookups; + * -> ~85ns each + * ospage: + * getting 585us for 10^4 lookups; + * -> ~58ns each + */ + for (std::uint32_t i_bm = 0; i_bm < params_v.size(); ++i_bm) { + params_v[i_bm]->run_lookup_benchmark(random_testdata); + } + } + + { + std::array, 7> const params_v + = {{ + std::unique_ptr(new StdMapTestParams("std-map-traverse")), + std::unique_ptr(new BtreeTestParams("bplus-min-traverse", + c_cacheline_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage16-traverse", + c_ospage16_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage8-traverse", + c_ospage8_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage4-traverse", + c_ospage4_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage2-traverse", + c_ospage2_branching_factor, + false)), + std::unique_ptr(new BtreeTestParams("bplus-ospage1-traverse", + c_ospage1_branching_factor, + false)) + }}; + + /* note: cacheline: getting 25us to traverse tree of size 10^4 + * -> ~2.5ns each + * note: ospage: getting 6us to traverse tree of size 10^4 + * -> ~0.6ns each + */ + for (std::uint32_t i_bm = 0; i_bm < params_v.size(); ++i_bm) { + params_v[i_bm]->run_traverse_benchmark(random_testdata); + } + } + + } /*TEST_CASE(bptree-benchmark)*/ +} /*namespace*/ + +/* end bplustree.cpp */ diff --git a/xo-ordinaltree/utest/random_tree_ops.hpp b/xo-ordinaltree/utest/random_tree_ops.hpp new file mode 100644 index 00000000..0c6d898f --- /dev/null +++ b/xo-ordinaltree/utest/random_tree_ops.hpp @@ -0,0 +1,450 @@ +/* @file random_tree_ops.hpp **/ + +#include "xo/randomgen/xoshiro256.hpp" +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/tag.hpp" +#include "xo/indentlog/print/vector.hpp" +#include "catch2/catch.hpp" +#include +#include +#include + +namespace utest { + struct Util { + /* generate vector with integers [0.. n-1] */ + static std::vector vector_upto(std::uint32_t n) { + std::vector u(n); + for (std::uint32_t i = 0; i < n; ++i) + u[i] = i; + + return u; + } /*vector_upto*/ + + static std::map + map_upto(std::uint32_t n) + { + std::map m; + for(std::uint32_t i=0; i + random_permutation(uint32_t n, xo::rng::xoshiro256ss *p_rgen) { + /* vector [0 .. n-1] */ + std::vector u = vector_upto(n); + + /* shuffle to get unpredictable permutation */ + std::shuffle(u.begin(), u.end(), *p_rgen); + + return u; + } /*random_permutation*/ + }; /*Util*/ + +/* note: trivial REQUIRE() call in else branch bc we still want + * catch2 to count assertions when verification succeeds + */ +# define REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr) \ + if (catch_flag) { \ + REQUIRE((expr)); \ + } else { \ + REQUIRE(true); \ + ok_flag &= (expr); \ + } + +# define REQUIRE_ORFAIL(ok_flag, catch_flag, expr) \ + REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr); \ + if (!ok_flag) \ + return ok_flag + + + template + struct TreeUtil : public Util { + static bool + test_clear(bool catch_flag, + Tree * p_tree) + { + bool ok_flag = true; + + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->verify_ok()); + + p_tree->clear(); + + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->verify_ok(catch_flag)); + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->empty()); + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->size() == 0); + + return ok_flag; + } /*test_clear*/ + + /* do n random inserts (taken from *p_rgen) into *p_rbtreẹ + * inserted keys will be distinct values in [0, .., n-1] + */ + static bool + random_inserts(std::uint32_t n, + bool catch_flag, + xo::rng::xoshiro256ss * p_rgen, + Tree * p_tree) + { + using xo::xtag; + + bool ok_flag = true; + + xo::scope log(XO_DEBUG(catch_flag)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->verify_ok()); + + /* n keys 0..n-1 */ + std::vector u(n); + for(std::uint32_t i=0; iinsert(typename Tree::value_type(x, 10 * x)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->verify_ok(catch_flag)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, insert_result.second); + + /* verify: iterator returned by Treẹinsert(), refers to inserted key,value pair */ + log && log(xtag("iter.node", insert_result.first.node())); + REQUIRE_ORFAIL(ok_flag, catch_flag, insert_result.first->first == x); + REQUIRE_ORFAIL(ok_flag, catch_flag, insert_result.first->second == 10 * x); + + ++i; + } + + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->size() == n); + + return ok_flag; + } /*random_inserts*/ + + /* do n random removes (taken from *p_rgen) from *p_rbtree; + * assumes *p_rbtree has keys [0 .. n-1] where n=p_rbtreẹsize + */ + static bool + random_removes(bool catch_flag, + xo::rng::xoshiro256ss * p_rgen, + Tree * p_tree) + { + using xo::scope; + using xo::xtag; + + bool ok_flag = true; + + xo::scope log(XO_DEBUG(catch_flag)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->verify_ok(catch_flag)); + + uint32_t n = p_tree->size(); + + /* random permutation of keys in *p_tree */ + std::vector u + = random_permutation(n, p_rgen); + + log && log(xtag("remove-order", u)); + + /* will keep track of which keys remain as we move them */ + std::map m = Util::map_upto(n); + + /* remove keys in permutation order */ + std::uint32_t i = 1; + for (std::uint32_t x : u) { + log && log("iter i: removing key from n-node tree", + xtag("i", i), xtag("key", x), xtag("n", n)); + + /* remove x from tracking map m also */ + m.erase(x); + + log && log("remove key :iter ", i, "/", n, xtag("key", x)); + + p_tree->erase(x); + // rbtreẹdisplay(); + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->size() == n-i); + /* amongst other things, this guarantees that keys in *p_tree + * appear in increasing order + */ + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->verify_ok(catch_flag)); + +#ifdef NOT_YET + /* 1. rbtree should now contain all the keys in [0..n-1], + * with u[0]..u[i-1] excluded; this is the same as the + * contents of m. + */ + auto m_ix = m.begin(); + auto m_end_ix = m.end(); + auto visitor_fn = + ([&m_ix, m_end_ix] + (std::pair const & contents) + { + REQUIRE(m_ix != m_end_ix); + REQUIRE(contents.first == m_ix->second); + ++m_ix; + }); + p_tree->visit_inorder(visitor_fn); +#endif + ++i; + } + + REQUIRE_ORFAIL(ok_flag, catch_flag, m.empty()); + REQUIRE_ORFAIL(ok_flag, catch_flag, p_tree->size() == 0); + + log.end_scope(); + + return ok_flag; + } /*random_removes*/ + + /* Require: + * - tree has keys [0..n-1], where n=treẹsize() + * - for each key k, associated value is 10*k + */ + static bool + random_lookups(bool catch_flag, + Tree const & tree, + xo::rng::xoshiro256ss * p_rgen) + { + using xo::scope; + using xo::xtag; + + xo::scope log(XO_DEBUG(catch_flag)); + + /* -> false if/when verification fails */ + bool ok_flag = true; + + REQUIRE_ORFAIL(ok_flag, catch_flag, tree.verify_ok(catch_flag)); + + size_t n = tree.size(); + std::vector u + = random_permutation(n, p_rgen); + + /* lookup keys in permutation order */ + std::uint32_t i = 1; + for (std::uint32_t x : u) { + INFO(tostr(xtag("i", i), xtag("n", n), xtag("x", x))); + + REQUIRE_ORFAIL(ok_flag, catch_flag, tree[x] == x*10); + REQUIRE_ORFAIL(ok_flag, catch_flag, tree.verify_ok(catch_flag)); + REQUIRE_ORFAIL(ok_flag, catch_flag, tree.size() == n); + + /* also test treẹfind() */ + auto find_ix = tree.find(x); + + REQUIRE_ORFAIL(ok_flag, catch_flag, find_ix != tree.end()); + REQUIRE_ORFAIL(ok_flag, catch_flag, find_ix->first == x); + REQUIRE_ORFAIL(ok_flag, catch_flag, find_ix->second == x*10); + + ++i; + } + + REQUIRE_ORFAIL(ok_flag, catch_flag, tree.size() == n); + + log.end_scope(); + + return ok_flag; + } /*random_lookups*/ + + /* Require: + * - tree has keys [0..n-1], where n=treẹsize() + * - tree value at key k is dvalue+10*k + */ + static bool + check_ordinal_lookup(std::uint32_t dvalue, + bool catch_flag, + Tree const & tree) + { + using xo::scope; + using xo::xtag; + + /* -> false if/when verification fails */ + bool ok_flag = true; + + xo::scope log(XO_DEBUG(catch_flag)); + + std::size_t const n = tree.size(); + std::size_t i = 0; + + log && log("tree with size n", xtag("n", n)); + + for (std::size_t i=0; ifirst == i)); + REQUIRE_ORFAIL(ok_flag, catch_flag, (ix->second == 10*i + dvalue)); + } + + log.end_scope(); + + return ok_flag; + } /*check_ordinal_lookup*/ + + /* Require: + * - tree has keys [0..n-1], where n=treẹsize() + * - tree values at key k is dvalue+10*k + * + * catch_flag. true -> log to console + interact with catch2 + * false -> verify iteration behavior for return code + */ + static bool + check_bidirectional_iterator(uint32_t dvalue, + bool catch_flag, + Tree const & tree) + { + using xo::scope; + using xo::xtag; + + /* -> false if/when verification fails */ + bool ok_flag = true; + + std::size_t const n = tree.size(); + + xo::scope log(XO_DEBUG(catch_flag)); + + log && log("tree with size n", xtag("n", n)); + + { + std::size_t i = 0; + + auto end_ix = tree.end(); + + log && log(xtag("end_ix", end_ix)); + + auto begin_ix = tree.begin(); + auto ix = begin_ix; + + int last_key = -1; + + while (ix != end_ix) { + log && log("forward loop top", + xtag("i", i), + xtag("ix", ix)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, ix->first == i); + REQUIRE_ORFAIL(ok_flag, catch_flag, ix->second == dvalue + 10*i); + if(i > 0) { + REQUIRE_ORFAIL(ok_flag, catch_flag, ix->first > last_key); + } + last_key = ix->first; + ++i; + ++ix; + + log && log("forward loop bottom", + xtag("last_key", last_key), + xtag("next ix", ix)); + } + + /* should have visited exactly n locations */ + REQUIRE_ORFAIL(ok_flag, catch_flag, i == n); + REQUIRE_ORFAIL(ok_flag, catch_flag, ix == end_ix); + + log && log(xtag("ix", ix), xtag("begin_ix", begin_ix)); + + /* now run iterator backwards, + * starting from "one past the end" + */ + if(ix != begin_ix) { + do { + --i; + --ix; + + log && log("forward backup", + xtag("i", i), + xtag("ix", ix)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, ix.is_dereferenceable()); + + log && log(xtag("ix.first", (*ix).first)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, (*ix).first == i); + } while (ix != begin_ix); + } + + /* should have visited exactly n locations in reverse */ + REQUIRE_ORFAIL(ok_flag, catch_flag, i == 0); + } + + /* ----- reverse iterators ----- */ + + { + std::int64_t i = n - 1; + + auto rbegin_ix = tree.rbegin(); + auto rend_ix = tree.rend(); + + auto rix = rbegin_ix; + + int last_key = -1; + + while (rix != rend_ix) { + log && log("reverse loop top", + xtag("i", i), + xtag("rix", rix)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, rix->first == i); + REQUIRE_ORFAIL(ok_flag, catch_flag, rix->second == dvalue + 10*i); + if (i < n-1) { + REQUIRE_ORFAIL(ok_flag, catch_flag, rix->first < last_key); + } + last_key = rix->first; + --i; + ++rix; + + log && log("reverse loop bottom", + xtag("last_key", last_key), + xtag("next ix", rix)); + } + + /* should have visited exactly n locations */ + REQUIRE_ORFAIL(ok_flag, catch_flag, i == -1); + + log && log(xtag("rbegin_ix", rbegin_ix)); + + /* now run reverse iterator backwrds, + * starting from "one before the beginning" + */ + if (rix != rbegin_ix) { + do { + ++i; + --rix; + + log && log("reverse backup", + xtag("i", i), + xtag("rix", rix), + xtag("rix.first", rix->first)); + + REQUIRE_ORFAIL(ok_flag, catch_flag, (*rix).first == i); + } while (rix != rbegin_ix); + } + + /* should have visited exactly n locations in reversê2 */ + REQUIRE_ORFAIL(ok_flag, catch_flag, i == n - 1); + } + + log.end_scope(); + + return ok_flag; + } /*check_bidirectional_iterator*/ + }; /*TreeUtil*/ +} /*namespace utest*/ + +/* end random_tree_ops.hpp */ diff --git a/xo-ordinaltree/utest/redblacktree.cpp b/xo-ordinaltree/utest/redblacktree.cpp new file mode 100644 index 00000000..f33db805 --- /dev/null +++ b/xo-ordinaltree/utest/redblacktree.cpp @@ -0,0 +1,248 @@ +/* @file redblacktree.cpp */ + +#include "random_tree_ops.hpp" +#include "xo/ordinaltree/RedBlackTree.hpp" +#include + +namespace { + using xo::tree::RedBlackTree; + using xo::tree::SumReduce; + using xo::tree::OrdinalReduce; + using xo::tree::NullReduce; + using xo::rng::xoshiro256ss; + + using utest::Util; + using utest::TreeUtil; + + using xo::scope; + using xo::scope_setup; + using xo::xtag; + + //using RbTree = RedBlackTree>; + using RbTree = RedBlackTree>; + +#ifdef OBSOLETE + /* Require: + * - rbtree has keys [0..n-1] where n=rbtree.size(), + * - rbtree value at key k is dvalue+10*k + */ + void + check_ordinal_lookup(uint32_t dvalue, + RbTree const & rbtree) + { + size_t const n = rbtree.size(); + size_t i = 0; + + for(size_t i=0; ifirst == i); + } + } /*check_ordinal_lookup*/ +#endif + + /* check that RedBlackTree<>::find_sum_glb() works as advertised. + * + * partial sums of v[j] for j<=i will be: + * + * (i+1) . i + * 10 . --------- + ((i+1) . dvalue) + * 2 + * + * = (i+1).(5.i + dvalue) + * + * Require: + * - rbtree has keys [0..n-1], where n=rbtree.size() + * - rbtree value at key k is dvalue+10*k + */ + void + check_reduced_sum(uint32_t dvalue, + RbTree const & rbtree) + { + size_t const n = rbtree.size(); + + for(size_t i = 0; i < n; ++i) { + /* compute reduction up to key=i */ + double reduced_upto + = rbtree.reduce_lub(i /*key*/, + true /*is_closed*/); + + double reduced = (i+1) * (5*i + dvalue); + + INFO(tostr(xtag("i", i), xtag("n", n), + xtag("tree.reduced_upto", reduced_upto), + xtag("reduced", reduced), + xtag("dvalue", dvalue))); + + auto glb_ix = rbtree.cfind_sum_glb(reduced); + + REQUIRE(reduced_upto == reduced); + + REQUIRE(glb_ix.is_dereferenceable()); + /* glb_ix is truth-y */ + REQUIRE(glb_ix); + + REQUIRE(glb_ix->first == i); + } + } /*check_reduced_sum*/ + +#ifdef OBSOLETE + /* Require: + * - *p_rbtree has keys [0..n-1], where n=rbtree.size() + * - for each key k, associated value is 10*k + */ + void + random_lookups(RbTree const & rbtree, + xoshiro256ss * p_rgen) + { + REQUIRE(rbtree.verify_ok()); + + size_t n = rbtree.size(); + std::vector u + = Util::random_permutation(n, p_rgen); + + /* lookup keys in permutation order */ + uint32_t i = 1; + for (uint32_t x : u) { + INFO(tostr(xtag("i", i), xtag("n", n), xtag("x", x))); + + REQUIRE(rbtree[x] == x*10); + REQUIRE(rbtree.verify_ok()); + REQUIRE(rbtree.size() == n); + ++i; + } + + REQUIRE(rbtree.size() == n); + } /*random_lookups*/ +#endif + + /* Require: + * - *p_rbtree has keys [0..n-1], where n=rbtree.size() + * - for each key k, associated value is 10*k + * + * Promise: + * - for each key k, associated value is dvalue + 10*k + */ + void + random_updates(uint32_t dvalue, + RbTree * p_rbtree, + xoshiro256ss * p_rgen) + { + REQUIRE(p_rbtree->verify_ok()); + + std::size_t n = p_rbtree->size(); + std::vector u + = Util::random_permutation(n, p_rgen); + + /* update key/value pairs in permutation order */ + uint32_t i = 1; + for (uint32_t x : u) { + REQUIRE((*p_rbtree)[x] == x*10); + + (*p_rbtree)[x] = dvalue + 10*x; + + REQUIRE((*p_rbtree)[x] == dvalue + 10*x); + REQUIRE(p_rbtree->verify_ok()); + /* assignment to existing key does not change tree size */ + REQUIRE(p_rbtree->size() == n); + ++i; + } + + REQUIRE(p_rbtree->size() == n); + } /*random_updates_1*/ + + TEST_CASE("rbtree", "[redblacktree]") { + RbTree rbtree; + + std::uint64_t seed = 14950349842636922572UL; + /* can reseed from /dev/urandom with: */ + //arc4random_buf(&seed, sizeof(seed)); + + auto rgen = xo::rng::xoshiro256ss(seed); + + /* perform a series of tests with increasing scale */ + for(std::uint32_t n=0; n<=1024; ) { + bool ok_flag = false; + + for (std::uint32_t attention = 0; !ok_flag && (attention < 2); ++attention) { + /* attention=0: + * - no logging + * - detect assertion failures, but don't report them to catch2 + * attention=1: + * - only runs if failure detected with attention=0 + * - full logging + * - report to catch + */ + + bool debug_flag = (attention == 1); + + scope log(XO_DEBUG2(debug_flag, "rbtree")); + log && log(xtag("size", n)); + + ok_flag = true; + + if (n == 0) { + /* check iteration on empty tree */ + ok_flag &= TreeUtil::check_bidirectional_iterator(0 /*dvalue - not used*/, + debug_flag, + rbtree); + } else { + /* insert [0..n-1] in random order */ + ok_flag &= TreeUtil::random_inserts(n, debug_flag, &rgen, &rbtree); + + /* TODO: generalize remaining helpers; share with bplustree unit test */ + + /* check iterator traverses [0..n-1] in both directions (using ++ and --) */ + ok_flag &= TreeUtil::check_ordinal_lookup(0 /*dvalue*/, + debug_flag, + rbtree); + /* verify end-to-end iteration */ + ok_flag &= TreeUtil::check_bidirectional_iterator(0, + debug_flag, + rbtree); + /* verify behavior of .reduce_lub(), .find_sum_glb() */ + check_reduced_sum(0, rbtree); + /* verify behavior of read-only variant of operator[] */ + ok_flag &= TreeUtil::random_lookups(debug_flag, + rbtree, + &rgen); + + /* verify that lookups didn't somehow disturb tree contents */ + ok_flag &= TreeUtil::check_ordinal_lookup(0 /*dvalue*/, + debug_flag, + rbtree); + + ok_flag &= TreeUtil::check_bidirectional_iterator(0, + debug_flag, + rbtree); + /* verify update via read/write operator[] */ + random_updates(10000, &rbtree, &rgen); + + /* verify that updates changed tree contents in expected way */ + ok_flag &= TreeUtil::check_ordinal_lookup(10000 /*dvalue*/, + debug_flag, + rbtree); + + /* verify end-to-end iteration */ + ok_flag &= TreeUtil::check_bidirectional_iterator(10000, + debug_flag, + rbtree); + /* verify behavior of .reduce_lub(), .find_sum_glb() */ + check_reduced_sum(10000, rbtree); + /* verify behavior of read/write variant of operator[] */ + ok_flag &= TreeUtil::random_removes(debug_flag, &rgen, &rbtree); + } + + log.end_scope(); + } + + if (n == 0) + n = 1; + else + n = 2*n; + } + } /*TEST_CASE(rbtree)*/ +} /*namespace*/ + +/* end redblacktree.cpp */ diff --git a/xo-ordinaltree/utest/tree_utest_main.cpp b/xo-ordinaltree/utest/tree_utest_main.cpp new file mode 100644 index 00000000..73ae4bab --- /dev/null +++ b/xo-ordinaltree/utest/tree_utest_main.cpp @@ -0,0 +1,7 @@ +/* @file tree_utest_main.cpp */ + +#define CATCH_CONFIG_MAIN +#define CATCH_CONFIG_ENABLE_BENCHMARKING +#include "catch2/catch.hpp" + +/* end tree_utest_main.cpp */