From f60f90d8f321ec2ef545b9708ac654613d9d14ef Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Thu, 5 Mar 2026 13:02:12 +1100 Subject: [PATCH] xo-interpreter2 stack: refactor: string clases -> xo-stringtable2/ --- CMakeLists.txt | 44 +++++ idl/IGCObject_DString.json5 | 6 +- idl/IGCObject_DUniqueString.json5 | 18 ++ idl/IPrintable_DString.json5 | 4 +- idl/IPrintable_DUniqueString.json5 | 16 ++ include/xo/stringtable2/DUniqueString.hpp | 140 ++++++++++++++ include/xo/stringtable2/StringTable.hpp | 65 +++++++ include/xo/stringtable2/UniqueString.hpp | 12 ++ .../stringtable2/string/IGCObject_DString.hpp | 2 +- .../uniquestring/IGCObject_DUniqueString.hpp | 67 +++++++ .../uniquestring/IPrintable_DUniqueString.hpp | 62 +++++++ src/stringtable2/CMakeLists.txt | 7 + src/stringtable2/DUniqueString.cpp | 121 ++++++++++++ src/stringtable2/IGCObject_DUniqueString.cpp | 39 ++++ src/stringtable2/IPrintable_DUniqueString.cpp | 28 +++ src/stringtable2/StringTable.cpp | 173 ++++++++++++++++++ .../stringtable2_register_facets.cpp | 4 + .../stringtable2_register_types.cpp | 2 + utest/CMakeLists.txt | 1 + utest/StringTable.test.cpp | 161 ++++++++++++++++ 20 files changed, 966 insertions(+), 6 deletions(-) create mode 100644 idl/IGCObject_DUniqueString.json5 create mode 100644 idl/IPrintable_DUniqueString.json5 create mode 100644 include/xo/stringtable2/DUniqueString.hpp create mode 100644 include/xo/stringtable2/StringTable.hpp create mode 100644 include/xo/stringtable2/UniqueString.hpp create mode 100644 include/xo/stringtable2/uniquestring/IGCObject_DUniqueString.hpp create mode 100644 include/xo/stringtable2/uniquestring/IPrintable_DUniqueString.hpp create mode 100644 src/stringtable2/DUniqueString.cpp create mode 100644 src/stringtable2/IGCObject_DUniqueString.cpp create mode 100644 src/stringtable2/IPrintable_DUniqueString.cpp create mode 100644 src/stringtable2/StringTable.cpp create mode 100644 utest/StringTable.test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 40f375b..2c2a696 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,50 @@ xo_cxx_toplevel_options3() set(PROJECT_CXX_FLAGS "") add_definitions(${PROJECT_CXX_FLAGS}) +# ---------------------------------------------------------------- + +# note: manual target; generated code committed to git +xo_add_genfacetimpl( + TARGET xo-stringtable2-facetimpl-gcobject-string + FACET_PKG xo_alloc2 + FACET GCObject + REPR String + INPUT idl/IGCObject_DString.json5 +) + +# note: manual target; generated code committed to git +xo_add_genfacetimpl( + TARGET xo-stringtable2-facetimpl-printable-string + FACET_PKG xo_printable2 + FACET Printable + REPR String + INPUT idl/IPrintable_DString.json5 +) + +# ---------------------------------------------------------------- + +# note: manual target; generated code committed to git +xo_add_genfacetimpl( + TARGET xo-stringtable2-facetimpl-gcobject-uniquestring + FACET_PKG xo_alloc2 + FACET GCObject + REPR UniqueString + INPUT idl/IGCObject_DUniqueString.json5 +) + +# note: manual target; generated code committed to git +xo_add_genfacetimpl( + TARGET xo-stringtable2-facetimpl-printable-uniquestring + FACET_PKG xo_printable2 + FACET Printable + REPR UniqueString + INPUT idl/IPrintable_DUniqueString.json5 +) + +# ---------------------------------------------------------------- + +xo_add_genfacet_all(xo-stringtable2-genfacet-all) + # ---------------------------------------------------------------- # output targets diff --git a/idl/IGCObject_DString.json5 b/idl/IGCObject_DString.json5 index caf33fa..6d986cc 100644 --- a/idl/IGCObject_DString.json5 +++ b/idl/IGCObject_DString.json5 @@ -1,10 +1,10 @@ { mode: "implementation", - output_cpp_dir: "src/object2", - output_hpp_dir: "include/xo/object2", + output_cpp_dir: "src/stringtable2", + output_hpp_dir: "include/xo/stringtable2", output_impl_subdir: "string", includes: [ - "", + "", "" ], local_types: [ ], diff --git a/idl/IGCObject_DUniqueString.json5 b/idl/IGCObject_DUniqueString.json5 new file mode 100644 index 0000000..a5eb0c6 --- /dev/null +++ b/idl/IGCObject_DUniqueString.json5 @@ -0,0 +1,18 @@ +{ + mode: "implementation", + output_cpp_dir: "src/stringtable2", + output_hpp_dir: "include/xo/stringtable2", + output_impl_subdir: "uniquestring", + includes: [ + "", + "" + ], + local_types: [ ], + namespace1: "xo", + namespace2: "scm", + facet_idl: "idl/GCObject.json5", + brief: "provide AGCObject interface for DUniqueString", + using_doxygen: true, + repr: "DUniqueString", + doc: [ "implement AGCObject for DUniqueString" ], +} diff --git a/idl/IPrintable_DString.json5 b/idl/IPrintable_DString.json5 index 8510938..bbbd3bc 100644 --- a/idl/IPrintable_DString.json5 +++ b/idl/IPrintable_DString.json5 @@ -1,7 +1,7 @@ { mode: "implementation", - output_cpp_dir: "src/object2", - output_hpp_dir: "include/xo/object2", + output_cpp_dir: "src/stringtable2", + output_hpp_dir: "include/xo/stringtable2", output_impl_subdir: "string", includes: [ "", "" ], diff --git a/idl/IPrintable_DUniqueString.json5 b/idl/IPrintable_DUniqueString.json5 new file mode 100644 index 0000000..b6af4eb --- /dev/null +++ b/idl/IPrintable_DUniqueString.json5 @@ -0,0 +1,16 @@ +{ + mode: "implementation", + output_cpp_dir: "src/stringtable2", + output_hpp_dir: "include/xo/stringtable2", + output_impl_subdir: "uniquestring", + includes: [ "", + "" ], + local_types: [ ], + namespace1: "xo", + namespace2: "scm", + facet_idl: "idl/Printable.json5", + brief: "provide APrintable interface for DUniqueString", + using_doxygen: true, + repr: "DUniqueString", + doc: [ "implement APrintable for DUniqueString" ], +} diff --git a/include/xo/stringtable2/DUniqueString.hpp b/include/xo/stringtable2/DUniqueString.hpp new file mode 100644 index 0000000..9f56a9c --- /dev/null +++ b/include/xo/stringtable2/DUniqueString.hpp @@ -0,0 +1,140 @@ +/** @file DUniqueString.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include + +namespace xo { + namespace scm { + /** @class DUniqueString + * @brief unique immutable string + * + * A DUniqueString is an immutable string stored in a shared StringTable. + * Follows that DUniqueStrings at different memory locations + * have different contents. + * + * DUniqueString instances will be created by StringTable (see also). + * Application code will not allocate them directly. + * + * Needs to be gc-aware so that collector knows what to do when it encounters + * a obj with a DUnqiueString data pointer; such instances + * will not be allocated from GC memory + **/ + class DUniqueString { + public: + using AAllocator = xo::mm::AAllocator; + using ACollector = xo::mm::ACollector; + using size_type = DString::size_type; + using ppindentinfo = xo::print::ppindentinfo; + + /* Memory model for a DUniqueString allocated via xo allocator + * + * 0 8 16 20 24 24+z + * v v v v v v + * +---------------+-+-------------+-------+-------+-----------+ + * | header |u| padding | cap | size | text... \0| + * +---------------+-+-------------+-------+-------+-----------+ + * + * Legend + * header 8 byte allocation header + * u 1 byte DUniqueString placeholder (c++ insists) + * padding 7 bytes allocator-imposed padding to 8-byte alignment + * cap 4 bytes DString.capacity + * size 4 bytes DString.size + * text z bytes DString.size bytes of text (including null) + * In practice followed by padding to 8 byte + * alignment + */ + + /** @defgroup duniquestring-ctors constructors **/ + ///@{ + + /** not copyable **/ + DUniqueString(const DUniqueString &) = delete; + + ///@} + /** @defgroup duniquestring-methods methods **/ + ///@{ + + /** Available storage for this instance. + * For completeness' sake since uniquestring not modifiable + **/ + size_type capacity() const noexcept { return _text()->capacity(); } + size_type size() const noexcept { return _text()->size(); } + const char * chars() const noexcept { return _text()->chars(); } + + /** compare unique strings: return n with {n<0, n=0, n>0} + * when @p lhs lexicographically {before, at, after} @p rhs + **/ + static int compare(const DUniqueString & lhs, const DUniqueString & rhs); + + std::size_t hash() const noexcept { return _text()->hash(); } + operator std::string_view() const noexcept { return std::string_view(*_text()); } + /** not assignable **/ + DUniqueString & operator=(const DUniqueString &) = delete; + + ///@} + /** @defgroup duniquestring-printable-methods printable facet methods **/ + ///@{ + + bool pretty(const ppindentinfo & ppii) const; + + ///@} + /** @defgroup duniquestring-gcobject-methods gcobject facet methods **/ + ///@{ + + std::size_t shallow_size() const noexcept; + + /** clone unique string, using memory from allocator @p mm. **/ + DUniqueString * shallow_copy(obj mm) const noexcept; + + /** fixup child pointers (trivial for DUniqueString, no gc-owned children **/ + std::size_t forward_children(obj gc) noexcept; + + ///@} + + private: + /** @defgroup duniquestring-impl-methods implementation methods **/ + ///@{ + + /** default ctor **/ + DUniqueString() = default; + + /** DString containing actual string content immediately follows DUniqueString + * in memory; part of same alloc + **/ + DString * _text() const noexcept; + + //explicit DUniqueString(const DString * text) : text_{text} {} + + /** create instance using memory from @p mm, + * with string contents copied from @p sv + **/ + static DUniqueString * from_view(obj mm, + std::string_view sv); + + ///@} + + friend class StringTable; + }; + + /* since unique: just compare addresses */ + inline bool operator==(const DUniqueString & lhs, const DUniqueString & rhs) { + return (&lhs == &rhs); + } + + /* since unique: just compare addresses **/ + inline bool operator!=(const DUniqueString & lhs, const DUniqueString & rhs) { + return (&lhs != &rhs); + } + + inline bool operator<=(const DUniqueString & lhs, const DUniqueString & rhs) { + return (DUniqueString::compare(lhs, rhs) <= 0); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end UniqueString.hpp */ diff --git a/include/xo/stringtable2/StringTable.hpp b/include/xo/stringtable2/StringTable.hpp new file mode 100644 index 0000000..8d0354c --- /dev/null +++ b/include/xo/stringtable2/StringTable.hpp @@ -0,0 +1,65 @@ +/** @file StringTable.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include "DUniqueString.hpp" +#include +#include +#include + +namespace xo { + namespace scm { + + /** @class StringTable + * @brief table containing a set of interned strings + * + * A table of strings referenced in schematika expressions + **/ + class StringTable { + public: + using DArena = xo::mm::DArena; + using MemorySizeVisitor = xo::mm::MemorySizeVisitor; + using StringMap = xo::map::DArenaHashMap; + using size_type = StringMap::size_type; + + public: + StringTable(size_type hint_max_capacity, + bool debug_flag = false); + + /** lookup interned string; nullptr if not present **/ + const DUniqueString * lookup(std::string_view key) const; + + /** return unique string with contents @p key. Idempotent! **/ + const DUniqueString * intern(std::string_view key); + + /** generate unique symbol -- guaranteed not to collide + * with existing symbol in this table. + **/ + const DUniqueString * gensym(std::string_view prefix); + + /** verify StringTable invariants. + * Act on failure according to policy @p p + **/ + bool verify_ok(verify_policy p = verify_policy::throw_only()) const; + + /** visit string-table memory pools, call visitor(info) for each **/ + void visit_pools(const MemorySizeVisitor & visitor) const; + + private: + /** allocate string storage in this arena; use DString to represent each string. + * Can't use DArenaVector b/c DString has variable size + **/ + DArena strings_; + /** map_[s] points to arena strings, i.e. members of @ref strings_ **/ + StringMap map_; + }; + + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end StringTable.hpp */ diff --git a/include/xo/stringtable2/UniqueString.hpp b/include/xo/stringtable2/UniqueString.hpp new file mode 100644 index 0000000..6dfaadf --- /dev/null +++ b/include/xo/stringtable2/UniqueString.hpp @@ -0,0 +1,12 @@ +/** @file UniqueString.hpp + * + * @author Roland Conybeare, Feb 2026 + **/ + +#pragma once + +#include "DUniqueString.hpp" +#include "uniquestring/IGCObject_DUniqueString.hpp" +#include "uniquestring/IPrintable_DUniqueString.hpp" + +/* end UniqueString.hpp */ diff --git a/include/xo/stringtable2/string/IGCObject_DString.hpp b/include/xo/stringtable2/string/IGCObject_DString.hpp index 2a907f3..6438215 100644 --- a/include/xo/stringtable2/string/IGCObject_DString.hpp +++ b/include/xo/stringtable2/string/IGCObject_DString.hpp @@ -64,4 +64,4 @@ namespace xo { } /*namespace scm*/ } /*namespace xo*/ -/* end */ +/* end */ \ No newline at end of file diff --git a/include/xo/stringtable2/uniquestring/IGCObject_DUniqueString.hpp b/include/xo/stringtable2/uniquestring/IGCObject_DUniqueString.hpp new file mode 100644 index 0000000..2150dec --- /dev/null +++ b/include/xo/stringtable2/uniquestring/IGCObject_DUniqueString.hpp @@ -0,0 +1,67 @@ +/** @file IGCObject_DUniqueString.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [xo-facet/codegen/genfacet] + * arguments: + * --input [idl/IGCObject_DUniqueString.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_repr.hpp.j2] + * 3. idl for facet methods + * [idl/IGCObject_DUniqueString.json5] + **/ + +#pragma once + +#include "GCObject.hpp" +#include +#include +#include "DUniqueString.hpp" + +namespace xo { namespace scm { class IGCObject_DUniqueString; } } + +namespace xo { + namespace facet { + template <> + struct FacetImplementation + { + using ImplType = xo::mm::IGCObject_Xfer + ; + }; + } +} + +namespace xo { + namespace scm { + /** @class IGCObject_DUniqueString + **/ + class IGCObject_DUniqueString { + public: + /** @defgroup scm-gcobject-duniquestring-type-traits **/ + ///@{ + using size_type = xo::mm::AGCObject::size_type; + using AAllocator = xo::mm::AGCObject::AAllocator; + using ACollector = xo::mm::AGCObject::ACollector; + using Copaque = xo::mm::AGCObject::Copaque; + using Opaque = xo::mm::AGCObject::Opaque; + ///@} + /** @defgroup scm-gcobject-duniquestring-methods **/ + ///@{ + // const methods + /** memory consumption for this instance **/ + static size_type shallow_size(const DUniqueString & self) noexcept; + /** copy instance using allocator **/ + static Opaque shallow_copy(const DUniqueString & self, obj mm) noexcept; + + // non-const methods + /** during GC: forward immdiate children **/ + static size_type forward_children(DUniqueString & self, obj gc) noexcept; + ///@} + }; + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end */ \ No newline at end of file diff --git a/include/xo/stringtable2/uniquestring/IPrintable_DUniqueString.hpp b/include/xo/stringtable2/uniquestring/IPrintable_DUniqueString.hpp new file mode 100644 index 0000000..a57039a --- /dev/null +++ b/include/xo/stringtable2/uniquestring/IPrintable_DUniqueString.hpp @@ -0,0 +1,62 @@ +/** @file IPrintable_DUniqueString.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [xo-facet/codegen/genfacet] + * arguments: + * --input [idl/IPrintable_DUniqueString.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_repr.hpp.j2] + * 3. idl for facet methods + * [idl/IPrintable_DUniqueString.json5] + **/ + +#pragma once + +#include "Printable.hpp" +#include +#include +#include "DUniqueString.hpp" + +namespace xo { namespace scm { class IPrintable_DUniqueString; } } + +namespace xo { + namespace facet { + template <> + struct FacetImplementation + { + using ImplType = xo::print::IPrintable_Xfer + ; + }; + } +} + +namespace xo { + namespace scm { + /** @class IPrintable_DUniqueString + **/ + class IPrintable_DUniqueString { + public: + /** @defgroup scm-printable-duniquestring-type-traits **/ + ///@{ + using ppindentinfo = xo::print::APrintable::ppindentinfo; + using Copaque = xo::print::APrintable::Copaque; + using Opaque = xo::print::APrintable::Opaque; + ///@} + /** @defgroup scm-printable-duniquestring-methods **/ + ///@{ + // const methods + /** Pretty-printing support for this object. +See [xo-indentlog/xo/indentlog/pretty.hpp] **/ + static bool pretty(const DUniqueString & self, const ppindentinfo & ppii); + + // non-const methods + ///@} + }; + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end */ \ No newline at end of file diff --git a/src/stringtable2/CMakeLists.txt b/src/stringtable2/CMakeLists.txt index 5874217..cdf1f22 100644 --- a/src/stringtable2/CMakeLists.txt +++ b/src/stringtable2/CMakeLists.txt @@ -6,9 +6,16 @@ set(SELF_SRCS stringtable2_register_facets.cpp stringtable2_register_types.cpp + StringTable.cpp + DString.cpp IGCObject_DString.cpp IPrintable_DString.cpp + + DUniqueString.cpp + IGCObject_DUniqueString.cpp + IPrintable_DUniqueString.cpp + ) xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) diff --git a/src/stringtable2/DUniqueString.cpp b/src/stringtable2/DUniqueString.cpp new file mode 100644 index 0000000..98a7b33 --- /dev/null +++ b/src/stringtable2/DUniqueString.cpp @@ -0,0 +1,121 @@ +/** @file DUniqueString.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "DUniqueString.hpp" +#include "DString.hpp" +#include +#include +#include + +namespace xo { + using xo::mm::padding; + using xo::facet::typeseq; + + namespace scm { + int + DUniqueString::compare(const DUniqueString & lhs, const DUniqueString & rhs) + { + if (&lhs == &rhs) + return 0; + + return DString::compare(*(lhs._text()), *(rhs._text())); + } + + DString * + DUniqueString::_text() const noexcept + { + // location of paired DString is chosen + // by allocator (DArena, probably). + // + // In general allocator alignment more conservative + // than C++ alignment + // + // Remmebr also: although DUniqueString has zero members, + // C++ requires it to behave asif size at least 1 byte + // for iterator consistency + // (e.g. because c++ would support iterating over + // std::vector) + // + size_t offset = padding::with_padding(sizeof(*this)); + assert(offset > 0); + + return (DString *)(((std::byte *)this) + offset); + } + + bool + DUniqueString::pretty(const ppindentinfo & ppii) const + { + return _text()->pretty(ppii); + } + + DUniqueString * + DUniqueString::from_view(obj mm, + std::string_view sv) + { + scope log(XO_DEBUG(false)); + + /** fine point: choosing to allocate DUniqueString ahead of DString, + * so it comes first in bump allocator + **/ + + void * mem = mm.super_alloc(typeseq::id(), + sizeof(DUniqueString)); + DUniqueString * result = new (mem) DUniqueString(); + + /** allocated in memory immediate following @p result. + * This optimization saves us one pointer (8 bytes) in DUniqueString + * itself, plus one allocation header (8 bytes) for 16 bytes total + **/ + DString * text = DString::from_view_suballoc(mm, sv); + + log && log(xtag("result", result), xtag("result.text", result->_text()), xtag("text", text)); + + assert(text); + assert(text == result->_text()); + + /** must finish super-allocation before next alloc **/ + mm.sub_alloc(0, true); + + return result; + } + + size_t + DUniqueString::shallow_size() const noexcept + { + return sizeof(DUniqueString); + } + + DUniqueString * + DUniqueString::shallow_copy(obj mm) const noexcept + { + // well-posed, but not expected to be used. + assert(false); + + DUniqueString * copy = (DUniqueString *)mm.alloc_copy((std::byte *)this); + + if (copy) { + // Copy assignment not implemented in general + // *copy = *this; + // in this case *copy already has the same size as *this + + assert(size() <= capacity()); + + strncpy(copy->_text()->data(), + this->_text()->chars(), + this->size()); + } + + return copy; + } + + size_t + DUniqueString::forward_children(obj) noexcept + { + return shallow_size(); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end DUniqueString.cpp */ diff --git a/src/stringtable2/IGCObject_DUniqueString.cpp b/src/stringtable2/IGCObject_DUniqueString.cpp new file mode 100644 index 0000000..b51f0eb --- /dev/null +++ b/src/stringtable2/IGCObject_DUniqueString.cpp @@ -0,0 +1,39 @@ +/** @file IGCObject_DUniqueString.cpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [xo-facet/codegen/genfacet] + * arguments: + * --input [idl/IGCObject_DUniqueString.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_any.hpp.j2] + * 3. idl for facet methods + * [idl/IGCObject_DUniqueString.json5] +**/ + +#include "uniquestring/IGCObject_DUniqueString.hpp" + +namespace xo { + namespace scm { + auto + IGCObject_DUniqueString::shallow_size(const DUniqueString & self) noexcept -> size_type + { + return self.shallow_size(); + } + + auto + IGCObject_DUniqueString::shallow_copy(const DUniqueString & self, obj mm) noexcept -> Opaque + { + return self.shallow_copy(mm); + } + + auto + IGCObject_DUniqueString::forward_children(DUniqueString & self, obj gc) noexcept -> size_type + { + return self.forward_children(gc); + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end IGCObject_DUniqueString.cpp */ diff --git a/src/stringtable2/IPrintable_DUniqueString.cpp b/src/stringtable2/IPrintable_DUniqueString.cpp new file mode 100644 index 0000000..ef704de --- /dev/null +++ b/src/stringtable2/IPrintable_DUniqueString.cpp @@ -0,0 +1,28 @@ +/** @file IPrintable_DUniqueString.cpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [xo-facet/codegen/genfacet] + * arguments: + * --input [idl/IPrintable_DUniqueString.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_any.hpp.j2] + * 3. idl for facet methods + * [idl/IPrintable_DUniqueString.json5] +**/ + +#include "uniquestring/IPrintable_DUniqueString.hpp" + +namespace xo { + namespace scm { + auto + IPrintable_DUniqueString::pretty(const DUniqueString & self, const ppindentinfo & ppii) -> bool + { + return self.pretty(ppii); + } + + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end IPrintable_DUniqueString.cpp */ diff --git a/src/stringtable2/StringTable.cpp b/src/stringtable2/StringTable.cpp new file mode 100644 index 0000000..2d23d03 --- /dev/null +++ b/src/stringtable2/StringTable.cpp @@ -0,0 +1,173 @@ +/** @file StringTable.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "StringTable.hpp" +#include +#include + +namespace xo { + using xo::mm::ArenaConfig; + using xo::mm::AAllocator; + using xo::mm::MemorySizeInfo; + using xo::facet::with_facet; + using xo::facet::obj; + + namespace scm { + StringTable::StringTable(size_type hint_max_capacity, + bool debug_flag) + : strings_{DArena::map(ArenaConfig{.name_ = "strings", + .size_ = hint_max_capacity})}, + map_{"stringkeys", hint_max_capacity} + { + (void)debug_flag; + } + + const DUniqueString * + StringTable::lookup(std::string_view key) const + { + auto ix = map_.find(key); + + if (ix != map_.end()) + return ix->second; + + return nullptr; + } + + const DUniqueString * + StringTable::intern(std::string_view key) + { + // 1a. lookup key in map_. + // 1b. if present, return existing DString* + + auto ix = map_.find(key); + + if (ix != map_.end()) + return ix->second; + + // 2. otherwise need to add. + // + // 2d. return key2 address + + // 2a. allocate DUniqueString copy 'interned' of key in strings_ + auto mm = with_facet::mkobj(&strings_); + DUniqueString * interned = DUniqueString::from_view(mm, key); + + assert(interned); + if (interned) { + // 2b. make string_view from *interned + std::string_view interned_key = std::string_view(*interned); + + // interned_key has same lifetime as StringTable, + // we can use it in map_ + + // 2c. store address of 'interned' in map_ + auto & slot = this->map_[interned_key]; + + slot = interned; + + return slot; + } + + return nullptr; + } + + const DUniqueString * + StringTable::gensym(std::string_view prefix) + { + static std::size_t s_counter = 0; + + while (true) { + ++s_counter; + + char buf[80]; + assert(prefix.size() + 20 < sizeof(buf)); + + int n = snprintf(buf, sizeof(buf), + "%s:%lu", + prefix.data(), s_counter); + + if ((0 < n) && (std::size_t(n) < sizeof(buf))) + buf[n] = '\0'; + else + buf[sizeof(buf)-1] = '\0'; + + std::string_view sv(buf); + const DUniqueString * retval = this->lookup(sv); + if (!retval) { + /* not already in string view -> we have viable candidate */ + retval = this->intern(sv); + return retval; + } + } + } + + bool + StringTable::verify_ok(verify_policy policy) const + { + using xo::scope; + using xo::xtag; + + constexpr const char * c_self = "StringTable::verify_ok"; + scope log(XO_DEBUG(false)); + + /* ST1: underlying hash map passes its invariants */ + if (!map_.verify_ok(policy)) { + return policy.report_error(log, + c_self, ": map_.verify_ok failed"); + } + + /* ST2: for each entry, key points to value's string data */ + for (const auto & kv : map_) { + const std::string_view & key = kv.first; + const DUniqueString * value = kv.second; + + /* ST2.1: value is not null */ + if (value == nullptr) { + return policy.report_error(log, + c_self, ": null value in map", + xtag("key", key)); + } + + /* ST2.2: value lies within strings_ arena */ + if (!strings_.contains(value)) { + return policy.report_error(log, + c_self, ": value not in strings_ arena", + xtag("key", key), + xtag("value", (void*)value)); + } + + /* ST2.3: key.data() points to value's chars */ + if (key.data() != value->chars()) { + return policy.report_error(log, + c_self, ": key.data() != value->chars()", + xtag("key", key), + xtag("key.data()", (void*)key.data()), + xtag("value->chars()", (void*)value->chars())); + } + + /* ST2.4: key.size() == value->size() */ + if (key.size() != value->size()) { + return policy.report_error(log, + c_self, ": key.size() != value->size()", + xtag("key", key), + xtag("key.size()", key.size()), + xtag("value->size()", value->size())); + } + } + + return true; + } + + void + StringTable::visit_pools(const MemorySizeVisitor & visitor) const + { + strings_.visit_pools(visitor); + map_.visit_pools(visitor); + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end StringTable.cpp */ diff --git a/src/stringtable2/stringtable2_register_facets.cpp b/src/stringtable2/stringtable2_register_facets.cpp index 6a3aa41..e6a97df 100644 --- a/src/stringtable2/stringtable2_register_facets.cpp +++ b/src/stringtable2/stringtable2_register_facets.cpp @@ -5,6 +5,7 @@ #include "stringtable2_register_facets.hpp" +#include #include #include @@ -23,6 +24,9 @@ namespace xo { { scope log(XO_DEBUG(true)); + FacetRegistry::register_impl(); + FacetRegistry::register_impl(); + FacetRegistry::register_impl(); FacetRegistry::register_impl(); diff --git a/src/stringtable2/stringtable2_register_types.cpp b/src/stringtable2/stringtable2_register_types.cpp index fda9074..94afd40 100644 --- a/src/stringtable2/stringtable2_register_types.cpp +++ b/src/stringtable2/stringtable2_register_types.cpp @@ -4,6 +4,7 @@ **/ #include "stringtable2_register_types.hpp" +#include "UniqueString.hpp" #include "String.hpp" //#include @@ -23,6 +24,7 @@ namespace xo { bool ok = true; + ok &= gc.install_type(impl_for()); ok &= gc.install_type(impl_for()); return ok; diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt index 1b0b979..46f4a4e 100644 --- a/utest/CMakeLists.txt +++ b/utest/CMakeLists.txt @@ -3,6 +3,7 @@ set(UTEST_EXE utest.stringtable2) set(UTEST_SRCS stringtable2_utest_main.cpp + StringTable.test.cpp DString.test.cpp StringOps.test.cpp ) diff --git a/utest/StringTable.test.cpp b/utest/StringTable.test.cpp new file mode 100644 index 0000000..c880df6 --- /dev/null +++ b/utest/StringTable.test.cpp @@ -0,0 +1,161 @@ +/** @file StringTable.test.cpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#include +#include +#include + +namespace xo { + using xo::scm::StringTable; + using xo::scm::DUniqueString; + //using xo::scm::DString; + + namespace ut { + TEST_CASE("StringTable-lookup-empty", "[expression2][StringTable]") + { + StringTable table(1024); + + // lookup on empty table returns nullptr + REQUIRE(table.lookup("foo") == nullptr); + REQUIRE(table.lookup("") == nullptr); + } + + TEST_CASE("StringTable-intern", "[expression2][StringTable]") + { + StringTable table(1024); + + const DUniqueString * s1 = table.intern("hello"); + + REQUIRE(s1 != nullptr); + REQUIRE(std::strcmp(s1->chars(), "hello") == 0); + REQUIRE(s1->size() == 5); + } + + TEST_CASE("StringTable-intern-idempotent", "[expression2][StringTable]") + { + StringTable table(1024); + + const DUniqueString * s1 = table.intern("hello"); + const DUniqueString * s2 = table.intern("hello"); + + // same key returns same pointer + REQUIRE(s1 != nullptr); + REQUIRE(s2 != nullptr); + REQUIRE(s1 == s2); + } + + TEST_CASE("StringTable-lookup-after-intern", "[expression2][StringTable]") + { + StringTable table(1024); + + REQUIRE(table.lookup("hello") == nullptr); + + const DUniqueString * s1 = table.intern("hello"); + + const DUniqueString * s2 = table.lookup("hello"); + + REQUIRE(s2 != nullptr); + REQUIRE(s1 == s2); + } + + TEST_CASE("StringTable-multiple-strings", "[expression2][StringTable]") + { + StringTable table(1024); + + const DUniqueString * s1 = table.intern("apple"); + const DUniqueString * s2 = table.intern("banana"); + const DUniqueString * s3 = table.intern("cherry"); + + // all different pointers + REQUIRE(s1 != s2); + REQUIRE(s2 != s3); + REQUIRE(s1 != s3); + + // correct contents + REQUIRE(std::strcmp(s1->chars(), "apple") == 0); + REQUIRE(std::strcmp(s2->chars(), "banana") == 0); + REQUIRE(std::strcmp(s3->chars(), "cherry") == 0); + + // lookup still works + REQUIRE(table.lookup("apple") == s1); + REQUIRE(table.lookup("banana") == s2); + REQUIRE(table.lookup("cherry") == s3); + REQUIRE(table.lookup("date") == nullptr); + } + + TEST_CASE("StringTable-intern-empty-string", "[expression2][StringTable]") + { + StringTable table(1024); + + const DUniqueString * s1 = table.intern(""); + + REQUIRE(s1 != nullptr); + REQUIRE(s1->size() == 0); + REQUIRE(s1->chars()[0] == '\0'); + + // idempotent for empty string too + const DUniqueString * s2 = table.intern(""); + REQUIRE(s1 == s2); + } + + TEST_CASE("StringTable-verify_ok", "[expression2][StringTable]") + { + StringTable table(4096); + + { + INFO("1. empty table"); + + // empty table passes verify_ok + REQUIRE(table.verify_ok()); + } + + // after interning strings, still passes + { + INFO("2. intern(hello)"); + + table.intern("hello"); + REQUIRE(table.verify_ok()); + } + + { + INFO("3. intern(world)"); + + table.intern("world"); + REQUIRE(table.verify_ok()); + } + + { + INFO("4. intern(foo)"); + + table.intern("foo"); + REQUIRE(table.verify_ok()); + } + + { + INFO("5. intern(bar)"); + + table.intern("bar"); + REQUIRE(table.verify_ok()); + } + + // idempotent intern doesn't break invariants + { + INFO("6. intern(hello)"); + + table.intern("hello"); + REQUIRE(table.verify_ok()); + } + + { + INFO("7. intern(world)"); + + table.intern("world"); + REQUIRE(table.verify_ok()); + } + } + } /*namespace ut*/ +} /*namespace xo*/ + +/* end StringTable.test.cpp */