xo-expression2: + DUniqueString, use in StringTable

This commit is contained in:
Roland Conybeare 2026-01-16 13:05:58 -05:00
commit 0ae4b1528f
6 changed files with 255 additions and 20 deletions

View file

@ -0,0 +1,138 @@
/** @file DUniqueString.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#include <xo/object2/DString.hpp>
namespace xo {
namespace scm {
/** @class DUniqueString
* @brief unique immutable string
*
* A DUniqueString is an immutable string stored in a shared StringTable.
* Follows that DUniqueStrings at different memory locations
* have different contents.
*
* DUniqueString instances will be created by StringTable (see also).
* Application code will not allocate them directly.
*
* Needs to be gc-aware so that collector knows what to do when it encounters
* a obj<AGCObject> with a DUnqiueString data pointer; such instances
* will not be allocated from GC memory
**/
class DUniqueString {
public:
using AAllocator = xo::mm::AAllocator;
using ACollector = xo::mm::ACollector;
using size_type = DString::size_type;
/* Memory model for a DUniqueString allocated via xo allocator
*
* 0 8 16 20 24 24+z
* v v v v v v
* +---------------+-+-------------+-------+-------+-----------+
* | header |u| padding | cap | size | text... \0|
* +---------------+-+-------------+-------+-------+-----------+
*
* Legend
* header 8 byte allocation header
* u 1 byte DUniqueString placholder (c++ insists)
* padding 7 bytes allocator-imposed padding to 8-byte alignment
* cap 4 bytes DString.capacity
* size 4 bytes DString.size
* text z bytes DString.size bytes of text (including null)
* In practice followed by padding to 8 byte
* alignment
*/
/** @defgroup duniquestring-ctors constructors **/
///@{
/** not copyable **/
DUniqueString(const DUniqueString &) = delete;
///@}
/** @defgroup duniquestring-methods methods **/
///@{
size_type size() const noexcept { return _text()->size(); }
const char * chars() const noexcept { return _text()->chars(); }
/** compare unique strings: return n with {n<0, n=0, n>0}
* when @p lhs lexicographically {before, at, after} @p rhs
**/
static int compare(const DUniqueString & lhs, const DUniqueString & rhs) {
if (&lhs == &rhs)
return 0;
return DString::compare(*(lhs._text()), *(rhs._text()));
}
std::size_t hash() const noexcept { return _text()->hash(); }
operator std::string_view() const noexcept { return std::string_view(*_text()); }
///@}
/** @defgroup duniquestring-gcobject-methods gcobject facet methods **/
///@{
std::size_t shallow_size() const noexcept;
/** clone unique string, using memory from allocator @p mm. **/
DUniqueString * shallow_copy(obj<AAllocator> mm) const noexcept;
/** fixup child pointers (trivial for DUniqueString, no gc-owned children **/
std::size_t forward_children(obj<ACollector> gc) noexcept;
///@}
private:
/** @defgroup duniquestring-impl-methods implementation methods **/
///@{
/** default ctor **/
DUniqueString() = default;
/** DString containing actual string content immediately follows DUniqueString
* in memory; part of same alloc
**/
const DString * _text() const noexcept;
//explicit DUniqueString(const DString * text) : text_{text} {}
/** create instance using memory from @p mm,
* with string contents copied from @p sv
**/
static DUniqueString * from_view(obj<AAllocator> mm,
std::string_view sv);
///@}
friend class StringTable;
private:
#ifdef NOPE
/** interned string. Note stringtable memory distinct from gc memory,
* so gc will not (and should not) traverse this pointer.
**/
const DString * text_ = nullptr;
#endif
};
/* since unique: just compare addresses */
inline bool operator==(const DUniqueString & lhs, const DUniqueString & rhs) {
return (&lhs == &rhs);
}
/* since unique: just compare addresses **/
inline bool operator!=(const DUniqueString & lhs, const DUniqueString & rhs) {
return (&lhs != &rhs);
}
inline bool operator<=(const DUniqueString & lhs, const DUniqueString & rhs) {
return (DUniqueString::compare(lhs, rhs) <= 0);
}
} /*namespace scm*/
} /*namespace xo*/
/* end UniqueString.hpp */

View file

@ -5,7 +5,7 @@
#pragma once
#include <xo/object2/DString.hpp>
#include "DUniqueString.hpp"
#include <xo/arena/DArenaHashMap.hpp>
#include <xo/arena/DArena.hpp>
#include <xo/arena/hashmap/verify_policy.hpp>
@ -22,7 +22,7 @@ namespace xo {
public:
using DArena = xo::mm::DArena;
using StringMap = xo::map::DArenaHashMap<std::string_view,
DString*>;
DUniqueString*>;
using size_type = StringMap::size_type;
public:
@ -30,10 +30,10 @@ namespace xo {
bool debug_flag = false);
/** lookup interned string; nullptr if not present **/
const DString * lookup(std::string_view key) const;
const DUniqueString * lookup(std::string_view key) const;
/** return unique string with contents @p key. Idempotent! **/
const DString * intern(std::string_view key);
const DUniqueString * intern(std::string_view key);
/** verify StringTable invariants.
* Act on failure according to policy @p p

View file

@ -7,6 +7,7 @@ set(SELF_SRCS
IExpression_Any.cpp
IExpression_DConstant.cpp
StringTable.cpp
DUniqueString.cpp
expression2_register_facets.cpp
)

View file

@ -0,0 +1,95 @@
/** @file DUniqueString.cpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "DUniqueString.hpp"
#include <xo/arena/padding.hpp>
#include <xo/indentlog/scope.hpp>
namespace xo {
using xo::mm::padding;
using xo::facet::typeseq;
namespace scm {
const DString *
DUniqueString::_text() const noexcept
{
// location of paired DString is chosen
// by allocator (DArena, probably).
//
// In general allocator alignment more conservative
// than C++ alignment
//
// Remmebr also: although DUniqueString has zero members,
// C++ requires it to behave asif size at least 1 byte
// for iterator consistency
// (e.g. because c++ would support iterating over
// std::vector<EmptyStruct>)
//
size_t offset = padding::with_padding(sizeof(*this));
assert(offset > 0);
return (const DString *)(((std::byte *)this) + offset);
}
DUniqueString *
DUniqueString::from_view(obj<AAllocator> mm,
std::string_view sv)
{
scope log(XO_DEBUG(false));
/** fine point: choosing to allocate DUniqueString ahead of DString,
* so it comes first in bump allocator
**/
void * mem = mm.super_alloc(typeseq::id<DUniqueString>(),
sizeof(DUniqueString));
DUniqueString * result = new (mem) DUniqueString();
/** allocated in memory immediate following @p result.
* This optimization saves us one pointer (8 bytes) in DUniqueString
* itself, plus one allocation header (8 bytes) for 16 bytes total
**/
DString * text = DString::from_view_suballoc(mm, sv);
log && log(xtag("result", result), xtag("result.text", result->_text()), xtag("text", text));
assert(text);
assert(text == result->_text());
/** must finish super-allocation before next alloc **/
mm.sub_alloc(0, true);
return result;
}
size_t
DUniqueString::shallow_size() const noexcept
{
return sizeof(DUniqueString);
}
DUniqueString *
DUniqueString::shallow_copy(obj<AAllocator> mm) const noexcept
{
// well-posed, but not expected to be used.
assert(false);
DUniqueString * copy = (DUniqueString *)mm.alloc_copy((std::byte *)this);
if (copy)
*copy = *this;
return copy;
}
size_t
DUniqueString::forward_children(obj<ACollector>) noexcept
{
return shallow_size();
}
} /*namespace scm*/
} /*namespace xo*/
/* end DUniqueString.cpp */

View file

@ -23,7 +23,7 @@ namespace xo {
(void)debug_flag;
}
const DString *
const DUniqueString *
StringTable::lookup(std::string_view key) const
{
auto ix = map_.find(key);
@ -34,7 +34,7 @@ namespace xo {
return nullptr;
}
const DString *
const DUniqueString *
StringTable::intern(std::string_view key)
{
// 1a. lookup key in map_.
@ -49,9 +49,9 @@ namespace xo {
//
// 2d. return key2 address
// 2a. allocate DString copy 'interned' of key in strings_
// 2a. allocate DUniqueString copy 'interned' of key in strings_
auto mm = with_facet<AAllocator>::mkobj(&strings_);
DString * interned = DString::from_view(mm, key);
DUniqueString * interned = DUniqueString::from_view(mm, key);
assert(interned);
if (interned) {
@ -90,7 +90,7 @@ namespace xo {
/* ST2: for each entry, key points to value's string data */
for (const auto & kv : map_) {
const std::string_view & key = kv.first;
const DString * value = kv.second;
const DUniqueString * value = kv.second;
/* ST2.1: value is not null */
if (value == nullptr) {

View file

@ -9,7 +9,8 @@
namespace xo {
using xo::scm::StringTable;
using xo::scm::DString;
using xo::scm::DUniqueString;
//using xo::scm::DString;
namespace ut {
TEST_CASE("StringTable-lookup-empty", "[expression2][StringTable]")
@ -25,7 +26,7 @@ namespace xo {
{
StringTable table(1024);
const DString * s1 = table.intern("hello");
const DUniqueString * s1 = table.intern("hello");
REQUIRE(s1 != nullptr);
REQUIRE(std::strcmp(s1->chars(), "hello") == 0);
@ -36,8 +37,8 @@ namespace xo {
{
StringTable table(1024);
const DString * s1 = table.intern("hello");
const DString * s2 = table.intern("hello");
const DUniqueString * s1 = table.intern("hello");
const DUniqueString * s2 = table.intern("hello");
// same key returns same pointer
REQUIRE(s1 != nullptr);
@ -51,9 +52,9 @@ namespace xo {
REQUIRE(table.lookup("hello") == nullptr);
const DString * s1 = table.intern("hello");
const DUniqueString * s1 = table.intern("hello");
const DString * s2 = table.lookup("hello");
const DUniqueString * s2 = table.lookup("hello");
REQUIRE(s2 != nullptr);
REQUIRE(s1 == s2);
@ -63,9 +64,9 @@ namespace xo {
{
StringTable table(1024);
const DString * s1 = table.intern("apple");
const DString * s2 = table.intern("banana");
const DString * s3 = table.intern("cherry");
const DUniqueString * s1 = table.intern("apple");
const DUniqueString * s2 = table.intern("banana");
const DUniqueString * s3 = table.intern("cherry");
// all different pointers
REQUIRE(s1 != s2);
@ -88,14 +89,14 @@ namespace xo {
{
StringTable table(1024);
const DString * s1 = table.intern("");
const DUniqueString * s1 = table.intern("");
REQUIRE(s1 != nullptr);
REQUIRE(s1->size() == 0);
REQUIRE(s1->chars()[0] == '\0');
// idempotent for empty string too
const DString * s2 = table.intern("");
const DUniqueString * s2 = table.intern("");
REQUIRE(s1 == s2);
}