diff --git a/xo-arena/utest/DArenaHashMap.test.cpp b/xo-arena/utest/DArenaHashMap.test.cpp index 28da62cf..0962def5 100644 --- a/xo-arena/utest/DArenaHashMap.test.cpp +++ b/xo-arena/utest/DArenaHashMap.test.cpp @@ -274,6 +274,24 @@ namespace xo { REQUIRE(map[999] == 999); } + TEST_CASE("DArenaHashMap-string_view-key", "[arena][DArenaHashMap]") + { + using HashMap = DArenaHashMap; + + HashMap map(1024); + + map["hello"] = 42; + REQUIRE(map.size() == 1); + REQUIRE(map.verify_ok()); + + map["world"] = 100; + REQUIRE(map.size() == 2); + REQUIRE(map.verify_ok()); + + REQUIRE(map["hello"] == 42); + REQUIRE(map["world"] == 100); + } + // TODO: // - let's try getting lcov to work in xo-umbrella2 } diff --git a/xo-expression2/CMakeLists.txt b/xo-expression2/CMakeLists.txt index df5a0c56..0c6419c8 100644 --- a/xo-expression2/CMakeLists.txt +++ b/xo-expression2/CMakeLists.txt @@ -20,7 +20,7 @@ add_definitions(${PROJECT_CXX_FLAGS}) # ---------------------------------------------------------------- # output targets -#add_subdirectory(utest) +add_subdirectory(utest) # note: manual target; generated code committed to git xo_add_genfacet( diff --git a/xo-expression2/include/xo/expression2/StringTable.hpp b/xo-expression2/include/xo/expression2/StringTable.hpp new file mode 100644 index 00000000..9413a01a --- /dev/null +++ b/xo-expression2/include/xo/expression2/StringTable.hpp @@ -0,0 +1,56 @@ +/** @file StringTable.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include +#include +#include +#include + +namespace xo { + namespace scm { + + /** @class StringTable + * @brief table containing a set of interned strings + * + * A table of strings referenced in schematika expressions + **/ + class StringTable { + public: + using DArena = xo::mm::DArena; + using StringMap = xo::map::DArenaHashMap; + using size_type = StringMap::size_type; + + public: + StringTable(size_type hint_max_capacity, + bool debug_flag = false); + + /** lookup interned string; nullptr if not present **/ + const DString * lookup(std::string_view key) const; + + /** return unique string with contents @p key. Idempotent! **/ + const DString * intern(std::string_view key); + + /** verify StringTable invariants. + * Act on failure according to policy @p p + **/ + bool verify_ok(verify_policy p = verify_policy::throw_only()) const; + + private: + /** allocate string storage in this arena; use DString to represent each string. + * Can't use DArenaVector b/c DString has variable size + **/ + DArena strings_; + /** map_[s] points to arena strings, i.e. members of @ref strings_ **/ + StringMap map_; + }; + + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end StringTable.hpp */ diff --git a/xo-expression2/src/expression2/CMakeLists.txt b/xo-expression2/src/expression2/CMakeLists.txt index 956d405a..79dcecd0 100644 --- a/xo-expression2/src/expression2/CMakeLists.txt +++ b/xo-expression2/src/expression2/CMakeLists.txt @@ -6,6 +6,7 @@ set(SELF_SRCS TypeRef.cpp IExpression_Any.cpp IExpression_DConstant.cpp + StringTable.cpp expression2_register_facets.cpp ) diff --git a/xo-expression2/src/expression2/StringTable.cpp b/xo-expression2/src/expression2/StringTable.cpp new file mode 100644 index 00000000..73acf273 --- /dev/null +++ b/xo-expression2/src/expression2/StringTable.cpp @@ -0,0 +1,135 @@ +/** @file StringTable.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "StringTable.hpp" +#include +#include + +namespace xo { + using xo::mm::ArenaConfig; + using xo::mm::AAllocator; + using xo::facet::with_facet; + using xo::facet::obj; + + namespace scm { + StringTable::StringTable(size_type hint_max_capacity, + bool debug_flag) + : strings_{DArena::map(ArenaConfig{.name_ = "strings", + .size_ = hint_max_capacity})}, + map_{hint_max_capacity} + { + (void)debug_flag; + } + + const DString * + StringTable::lookup(std::string_view key) const + { + auto ix = map_.find(key); + + if (ix != map_.end()) + return ix->second; + + return nullptr; + } + + const DString * + StringTable::intern(std::string_view key) + { + // 1a. lookup key in map_. + // 1b. if present, return existing DString* + + auto ix = map_.find(key); + + if (ix != map_.end()) + return ix->second; + + // 2. otherwise need to add. + // + // 2d. return key2 address + + // 2a. allocate DString copy 'interned' of key in strings_ + auto mm = with_facet::mkobj(&strings_); + DString * interned = DString::from_view(mm, key); + + assert(interned); + if (interned) { + // 2b. make string_view from *interned + std::string_view interned_key = std::string_view(*interned); + + // interned_key has same lifetime as StringTable, + // we can use it in map_ + + // 2c. store address of 'interned' in map_ + auto & slot = this->map_[interned_key]; + + slot = interned; + + return slot; + } + + return nullptr; + } + + bool + StringTable::verify_ok(verify_policy policy) const + { + using xo::scope; + using xo::xtag; + + constexpr const char * c_self = "StringTable::verify_ok"; + scope log(XO_DEBUG(false)); + + /* ST1: underlying hash map passes its invariants */ + if (!map_.verify_ok(policy)) { + return policy.report_error(log, + c_self, ": map_.verify_ok failed"); + } + + /* ST2: for each entry, key points to value's string data */ + for (const auto & kv : map_) { + const std::string_view & key = kv.first; + const DString * value = kv.second; + + /* ST2.1: value is not null */ + if (value == nullptr) { + return policy.report_error(log, + c_self, ": null value in map", + xtag("key", key)); + } + + /* ST2.2: value lies within strings_ arena */ + if (!strings_.contains(value)) { + return policy.report_error(log, + c_self, ": value not in strings_ arena", + xtag("key", key), + xtag("value", (void*)value)); + } + + /* ST2.3: key.data() points to value's chars */ + if (key.data() != value->chars()) { + return policy.report_error(log, + c_self, ": key.data() != value->chars()", + xtag("key", key), + xtag("key.data()", (void*)key.data()), + xtag("value->chars()", (void*)value->chars())); + } + + /* ST2.4: key.size() == value->size() */ + if (key.size() != value->size()) { + return policy.report_error(log, + c_self, ": key.size() != value->size()", + xtag("key", key), + xtag("key.size()", key.size()), + xtag("value->size()", value->size())); + } + } + + return true; + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end StringTable.cpp */ diff --git a/xo-expression2/utest/CMakeLists.txt b/xo-expression2/utest/CMakeLists.txt new file mode 100644 index 00000000..e43fd217 --- /dev/null +++ b/xo-expression2/utest/CMakeLists.txt @@ -0,0 +1,11 @@ +# built unittest xo-expression2/utest + +set(UTEST_EXE utest.expression2) +set(UTEST_SRCS + expression2_utest_main.cpp + StringTable.test.cpp +) + +xo_add_utest_executable(${UTEST_EXE} ${UTEST_SRCS}) +xo_self_dependency(${UTEST_EXE} xo_expression2) +xo_external_target_dependency(${UTEST_EXE} Catch2 Catch2::Catch2) diff --git a/xo-expression2/utest/StringTable.test.cpp b/xo-expression2/utest/StringTable.test.cpp new file mode 100644 index 00000000..ef928f77 --- /dev/null +++ b/xo-expression2/utest/StringTable.test.cpp @@ -0,0 +1,160 @@ +/** @file StringTable.test.cpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#include +#include +#include + +namespace xo { + using xo::scm::StringTable; + using xo::scm::DString; + + namespace ut { + TEST_CASE("StringTable-lookup-empty", "[expression2][StringTable]") + { + StringTable table(1024); + + // lookup on empty table returns nullptr + REQUIRE(table.lookup("foo") == nullptr); + REQUIRE(table.lookup("") == nullptr); + } + + TEST_CASE("StringTable-intern", "[expression2][StringTable]") + { + StringTable table(1024); + + const DString * s1 = table.intern("hello"); + + REQUIRE(s1 != nullptr); + REQUIRE(std::strcmp(s1->chars(), "hello") == 0); + REQUIRE(s1->size() == 5); + } + + TEST_CASE("StringTable-intern-idempotent", "[expression2][StringTable]") + { + StringTable table(1024); + + const DString * s1 = table.intern("hello"); + const DString * s2 = table.intern("hello"); + + // same key returns same pointer + REQUIRE(s1 != nullptr); + REQUIRE(s2 != nullptr); + REQUIRE(s1 == s2); + } + + TEST_CASE("StringTable-lookup-after-intern", "[expression2][StringTable]") + { + StringTable table(1024); + + REQUIRE(table.lookup("hello") == nullptr); + + const DString * s1 = table.intern("hello"); + + const DString * s2 = table.lookup("hello"); + + REQUIRE(s2 != nullptr); + REQUIRE(s1 == s2); + } + + TEST_CASE("StringTable-multiple-strings", "[expression2][StringTable]") + { + StringTable table(1024); + + const DString * s1 = table.intern("apple"); + const DString * s2 = table.intern("banana"); + const DString * s3 = table.intern("cherry"); + + // all different pointers + REQUIRE(s1 != s2); + REQUIRE(s2 != s3); + REQUIRE(s1 != s3); + + // correct contents + REQUIRE(std::strcmp(s1->chars(), "apple") == 0); + REQUIRE(std::strcmp(s2->chars(), "banana") == 0); + REQUIRE(std::strcmp(s3->chars(), "cherry") == 0); + + // lookup still works + REQUIRE(table.lookup("apple") == s1); + REQUIRE(table.lookup("banana") == s2); + REQUIRE(table.lookup("cherry") == s3); + REQUIRE(table.lookup("date") == nullptr); + } + + TEST_CASE("StringTable-intern-empty-string", "[expression2][StringTable]") + { + StringTable table(1024); + + const DString * s1 = table.intern(""); + + REQUIRE(s1 != nullptr); + REQUIRE(s1->size() == 0); + REQUIRE(s1->chars()[0] == '\0'); + + // idempotent for empty string too + const DString * s2 = table.intern(""); + REQUIRE(s1 == s2); + } + + TEST_CASE("StringTable-verify_ok", "[expression2][StringTable]") + { + StringTable table(4096); + + { + INFO("1. empty table"); + + // empty table passes verify_ok + REQUIRE(table.verify_ok()); + } + + // after interning strings, still passes + { + INFO("2. intern(hello)"); + + table.intern("hello"); + REQUIRE(table.verify_ok()); + } + + { + INFO("3. intern(world)"); + + table.intern("world"); + REQUIRE(table.verify_ok()); + } + + { + INFO("4. intern(foo)"); + + table.intern("foo"); + REQUIRE(table.verify_ok()); + } + + { + INFO("5. intern(bar)"); + + table.intern("bar"); + REQUIRE(table.verify_ok()); + } + + // idempotent intern doesn't break invariants + { + INFO("6. intern(hello)"); + + table.intern("hello"); + REQUIRE(table.verify_ok()); + } + + { + INFO("7. intern(world)"); + + table.intern("world"); + REQUIRE(table.verify_ok()); + } + } + } /*namespace ut*/ +} /*namespace xo*/ + +/* end StringTable.test.cpp */ diff --git a/xo-expression2/utest/expression2_utest_main.cpp b/xo-expression2/utest/expression2_utest_main.cpp new file mode 100644 index 00000000..e13405df --- /dev/null +++ b/xo-expression2/utest/expression2_utest_main.cpp @@ -0,0 +1,6 @@ +/* file expression2_utest_main.cpp */ + +#define CATCH_CONFIG_MAIN +#include "catch2/catch.hpp" + +/* end expression2_utest_main.cpp */