From 0ae4b1528f2b15ac7dbbfdbed09a2c94f9b80c68 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Fri, 16 Jan 2026 13:05:58 -0500 Subject: [PATCH] xo-expression2: + DUniqueString, use in StringTable --- include/xo/expression2/DUniqueString.hpp | 138 +++++++++++++++++++++++ include/xo/expression2/StringTable.hpp | 8 +- src/expression2/CMakeLists.txt | 1 + src/expression2/DUniqueString.cpp | 95 ++++++++++++++++ src/expression2/StringTable.cpp | 10 +- utest/StringTable.test.cpp | 23 ++-- 6 files changed, 255 insertions(+), 20 deletions(-) create mode 100644 include/xo/expression2/DUniqueString.hpp create mode 100644 src/expression2/DUniqueString.cpp diff --git a/include/xo/expression2/DUniqueString.hpp b/include/xo/expression2/DUniqueString.hpp new file mode 100644 index 00000000..32a26726 --- /dev/null +++ b/include/xo/expression2/DUniqueString.hpp @@ -0,0 +1,138 @@ +/** @file DUniqueString.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include + +namespace xo { + namespace scm { + /** @class DUniqueString + * @brief unique immutable string + * + * A DUniqueString is an immutable string stored in a shared StringTable. + * Follows that DUniqueStrings at different memory locations + * have different contents. + * + * DUniqueString instances will be created by StringTable (see also). + * Application code will not allocate them directly. + * + * Needs to be gc-aware so that collector knows what to do when it encounters + * a obj with a DUnqiueString data pointer; such instances + * will not be allocated from GC memory + **/ + class DUniqueString { + public: + using AAllocator = xo::mm::AAllocator; + using ACollector = xo::mm::ACollector; + using size_type = DString::size_type; + + /* Memory model for a DUniqueString allocated via xo allocator + * + * 0 8 16 20 24 24+z + * v v v v v v + * +---------------+-+-------------+-------+-------+-----------+ + * | header |u| padding | cap | size | text... \0| + * +---------------+-+-------------+-------+-------+-----------+ + * + * Legend + * header 8 byte allocation header + * u 1 byte DUniqueString placholder (c++ insists) + * padding 7 bytes allocator-imposed padding to 8-byte alignment + * cap 4 bytes DString.capacity + * size 4 bytes DString.size + * text z bytes DString.size bytes of text (including null) + * In practice followed by padding to 8 byte + * alignment + */ + + /** @defgroup duniquestring-ctors constructors **/ + ///@{ + + /** not copyable **/ + DUniqueString(const DUniqueString &) = delete; + + ///@} + /** @defgroup duniquestring-methods methods **/ + ///@{ + + size_type size() const noexcept { return _text()->size(); } + const char * chars() const noexcept { return _text()->chars(); } + + /** compare unique strings: return n with {n<0, n=0, n>0} + * when @p lhs lexicographically {before, at, after} @p rhs + **/ + static int compare(const DUniqueString & lhs, const DUniqueString & rhs) { + if (&lhs == &rhs) + return 0; + + return DString::compare(*(lhs._text()), *(rhs._text())); + } + + std::size_t hash() const noexcept { return _text()->hash(); } + operator std::string_view() const noexcept { return std::string_view(*_text()); } + + ///@} + /** @defgroup duniquestring-gcobject-methods gcobject facet methods **/ + ///@{ + + std::size_t shallow_size() const noexcept; + + /** clone unique string, using memory from allocator @p mm. **/ + DUniqueString * shallow_copy(obj mm) const noexcept; + + /** fixup child pointers (trivial for DUniqueString, no gc-owned children **/ + std::size_t forward_children(obj gc) noexcept; + + ///@} + + private: + /** @defgroup duniquestring-impl-methods implementation methods **/ + ///@{ + + /** default ctor **/ + DUniqueString() = default; + + /** DString containing actual string content immediately follows DUniqueString + * in memory; part of same alloc + **/ + const DString * _text() const noexcept; + + //explicit DUniqueString(const DString * text) : text_{text} {} + + /** create instance using memory from @p mm, + * with string contents copied from @p sv + **/ + static DUniqueString * from_view(obj mm, + std::string_view sv); + + ///@} + + friend class StringTable; + + private: +#ifdef NOPE + /** interned string. Note stringtable memory distinct from gc memory, + * so gc will not (and should not) traverse this pointer. + **/ + const DString * text_ = nullptr; +#endif + }; + + /* since unique: just compare addresses */ + inline bool operator==(const DUniqueString & lhs, const DUniqueString & rhs) { + return (&lhs == &rhs); + } + + /* since unique: just compare addresses **/ + inline bool operator!=(const DUniqueString & lhs, const DUniqueString & rhs) { + return (&lhs != &rhs); + } + + inline bool operator<=(const DUniqueString & lhs, const DUniqueString & rhs) { + return (DUniqueString::compare(lhs, rhs) <= 0); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end UniqueString.hpp */ diff --git a/include/xo/expression2/StringTable.hpp b/include/xo/expression2/StringTable.hpp index 9413a01a..54b0a826 100644 --- a/include/xo/expression2/StringTable.hpp +++ b/include/xo/expression2/StringTable.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "DUniqueString.hpp" #include #include #include @@ -22,7 +22,7 @@ namespace xo { public: using DArena = xo::mm::DArena; using StringMap = xo::map::DArenaHashMap; + DUniqueString*>; using size_type = StringMap::size_type; public: @@ -30,10 +30,10 @@ namespace xo { bool debug_flag = false); /** lookup interned string; nullptr if not present **/ - const DString * lookup(std::string_view key) const; + const DUniqueString * lookup(std::string_view key) const; /** return unique string with contents @p key. Idempotent! **/ - const DString * intern(std::string_view key); + const DUniqueString * intern(std::string_view key); /** verify StringTable invariants. * Act on failure according to policy @p p diff --git a/src/expression2/CMakeLists.txt b/src/expression2/CMakeLists.txt index 79dcecd0..41700e9c 100644 --- a/src/expression2/CMakeLists.txt +++ b/src/expression2/CMakeLists.txt @@ -7,6 +7,7 @@ set(SELF_SRCS IExpression_Any.cpp IExpression_DConstant.cpp StringTable.cpp + DUniqueString.cpp expression2_register_facets.cpp ) diff --git a/src/expression2/DUniqueString.cpp b/src/expression2/DUniqueString.cpp new file mode 100644 index 00000000..3e534b9c --- /dev/null +++ b/src/expression2/DUniqueString.cpp @@ -0,0 +1,95 @@ +/** @file DUniqueString.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "DUniqueString.hpp" +#include +#include + +namespace xo { + using xo::mm::padding; + using xo::facet::typeseq; + + namespace scm { + const DString * + DUniqueString::_text() const noexcept + { + // location of paired DString is chosen + // by allocator (DArena, probably). + // + // In general allocator alignment more conservative + // than C++ alignment + // + // Remmebr also: although DUniqueString has zero members, + // C++ requires it to behave asif size at least 1 byte + // for iterator consistency + // (e.g. because c++ would support iterating over + // std::vector) + // + size_t offset = padding::with_padding(sizeof(*this)); + assert(offset > 0); + + return (const DString *)(((std::byte *)this) + offset); + } + + DUniqueString * + DUniqueString::from_view(obj mm, + std::string_view sv) + { + scope log(XO_DEBUG(false)); + + /** fine point: choosing to allocate DUniqueString ahead of DString, + * so it comes first in bump allocator + **/ + + void * mem = mm.super_alloc(typeseq::id(), + sizeof(DUniqueString)); + DUniqueString * result = new (mem) DUniqueString(); + + /** allocated in memory immediate following @p result. + * This optimization saves us one pointer (8 bytes) in DUniqueString + * itself, plus one allocation header (8 bytes) for 16 bytes total + **/ + DString * text = DString::from_view_suballoc(mm, sv); + + log && log(xtag("result", result), xtag("result.text", result->_text()), xtag("text", text)); + + assert(text); + assert(text == result->_text()); + + /** must finish super-allocation before next alloc **/ + mm.sub_alloc(0, true); + + return result; + } + + size_t + DUniqueString::shallow_size() const noexcept + { + return sizeof(DUniqueString); + } + + DUniqueString * + DUniqueString::shallow_copy(obj mm) const noexcept + { + // well-posed, but not expected to be used. + assert(false); + + DUniqueString * copy = (DUniqueString *)mm.alloc_copy((std::byte *)this); + + if (copy) + *copy = *this; + + return copy; + } + + size_t + DUniqueString::forward_children(obj) noexcept + { + return shallow_size(); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end DUniqueString.cpp */ diff --git a/src/expression2/StringTable.cpp b/src/expression2/StringTable.cpp index 73acf273..ae48eea9 100644 --- a/src/expression2/StringTable.cpp +++ b/src/expression2/StringTable.cpp @@ -23,7 +23,7 @@ namespace xo { (void)debug_flag; } - const DString * + const DUniqueString * StringTable::lookup(std::string_view key) const { auto ix = map_.find(key); @@ -34,7 +34,7 @@ namespace xo { return nullptr; } - const DString * + const DUniqueString * StringTable::intern(std::string_view key) { // 1a. lookup key in map_. @@ -49,9 +49,9 @@ namespace xo { // // 2d. return key2 address - // 2a. allocate DString copy 'interned' of key in strings_ + // 2a. allocate DUniqueString copy 'interned' of key in strings_ auto mm = with_facet::mkobj(&strings_); - DString * interned = DString::from_view(mm, key); + DUniqueString * interned = DUniqueString::from_view(mm, key); assert(interned); if (interned) { @@ -90,7 +90,7 @@ namespace xo { /* ST2: for each entry, key points to value's string data */ for (const auto & kv : map_) { const std::string_view & key = kv.first; - const DString * value = kv.second; + const DUniqueString * value = kv.second; /* ST2.1: value is not null */ if (value == nullptr) { diff --git a/utest/StringTable.test.cpp b/utest/StringTable.test.cpp index ef928f77..66163c22 100644 --- a/utest/StringTable.test.cpp +++ b/utest/StringTable.test.cpp @@ -9,7 +9,8 @@ namespace xo { using xo::scm::StringTable; - using xo::scm::DString; + using xo::scm::DUniqueString; + //using xo::scm::DString; namespace ut { TEST_CASE("StringTable-lookup-empty", "[expression2][StringTable]") @@ -25,7 +26,7 @@ namespace xo { { StringTable table(1024); - const DString * s1 = table.intern("hello"); + const DUniqueString * s1 = table.intern("hello"); REQUIRE(s1 != nullptr); REQUIRE(std::strcmp(s1->chars(), "hello") == 0); @@ -36,8 +37,8 @@ namespace xo { { StringTable table(1024); - const DString * s1 = table.intern("hello"); - const DString * s2 = table.intern("hello"); + const DUniqueString * s1 = table.intern("hello"); + const DUniqueString * s2 = table.intern("hello"); // same key returns same pointer REQUIRE(s1 != nullptr); @@ -51,9 +52,9 @@ namespace xo { REQUIRE(table.lookup("hello") == nullptr); - const DString * s1 = table.intern("hello"); + const DUniqueString * s1 = table.intern("hello"); - const DString * s2 = table.lookup("hello"); + const DUniqueString * s2 = table.lookup("hello"); REQUIRE(s2 != nullptr); REQUIRE(s1 == s2); @@ -63,9 +64,9 @@ namespace xo { { StringTable table(1024); - const DString * s1 = table.intern("apple"); - const DString * s2 = table.intern("banana"); - const DString * s3 = table.intern("cherry"); + const DUniqueString * s1 = table.intern("apple"); + const DUniqueString * s2 = table.intern("banana"); + const DUniqueString * s3 = table.intern("cherry"); // all different pointers REQUIRE(s1 != s2); @@ -88,14 +89,14 @@ namespace xo { { StringTable table(1024); - const DString * s1 = table.intern(""); + const DUniqueString * s1 = table.intern(""); REQUIRE(s1 != nullptr); REQUIRE(s1->size() == 0); REQUIRE(s1->chars()[0] == '\0'); // idempotent for empty string too - const DString * s2 = table.intern(""); + const DUniqueString * s2 = table.intern(""); REQUIRE(s1 == s2); }