/* @file String.hpp * * author: Roland Conybeare, Aug 2025 */ #include "xo/alloc/Object.hpp" #include "ObjectConversion.hpp" #include "xo/alloc/IAlloc.hpp" namespace xo { namespace obj { /** unicode terminology (via https://utf8everywhere.org) * 1. code unit: * bit combination that represents a unit of encoded text. * 8-bits for utf-8 i.e. code-unit = char * 2. code point: * a numerical value in the unicode namespace, e.g. U+3243F * one or more code units encode a code point. * utf-8 uses 1-4 code units to encode each code point. * 3. abstract character: * inherently open, because includes characters that are not * (yet) representable in unicode. * 4. (en)coded character: * mapping between code points and abstract character. * for example U+1F428 is coded character for emoji named 'KOALA' * caveats: * - some code points do not have abstract characters assigned. * - some code points are reserved for non-characters * (e.g. null, newline ..) * - some abstract characters require multiple code points: * for example requiring a composition sequence * - some abstract characters have multiple encodings * 5. user-perceived character. whatever you think that means. * May be language-dependent. * 6. grapheme cluster. a sequence of coded characters that * "belong together". for example backspace would erase * a grapheme cluster atomically. * 7. a shape within a font. A sequence of code points maps to * a sequence of glyphs. **/ class String : public Object { public: enum class owner { unique, shared }; /** donwcast from @p x iff x is actually a String. Otherwise nullptr **/ static gp from(gp x); /** create shared string @p s, using allocator @ref Object::mm **/ static gp share(const char * s); /** create shared string @p s, using allocator @p mm **/ static gp share(gc::IAlloc * mm, const char * s); /** create copy of string @p s, using allocator @ref Object::mm **/ static gp copy(const char * s); /** create copy of string @p s, using allocator @p mm **/ static gp copy(gc::IAlloc * mm, const char * s); /** create empty string with @p z bytes of string space **/ static gp allocate(std::size_t z); /** create string containing contents of @p s1 follwed by contents of @p s2 **/ static gp append(gp s1, gp s2); const char * c_str() const { return chars_; } std::size_t length() const; /** Approximate number of columns (if using a fixed-width font) occupied * by this string. Obtained by counting bytes up to null terminator, * omitting utf-8 continuation bytes, i.e. bytes with high bit set * and 2nd-highest bit clear. * * @text * bits: 76543210 * 10______ * @endtext **/ std::size_t columns() const; // inherited from Object.. virtual TaggedPtr self_tp() const final override; virtual void display(std::ostream & os) const final override; virtual std::size_t _shallow_size() const final override; virtual Object * _shallow_copy(gc::IAlloc * gc) const final override; virtual std::size_t _forward_children(gc::IAlloc * gc) final override; private: String(owner owner, std::size_t z, char * s); /** create instance, copying string contents (when @p copy_flag is true) using allocator @p mm **/ String(gc::IAlloc * mm, owner owner, std::size_t z, char * s); private: /** true iff storage in @ref chars_ is owned by this String. **/ owner owner_ = owner::shared; /** length of @ref chars_ in bytes (storage allocated, not necessarily string length). * Includes null terminator **/ std::size_t z_chars_ = 0; /** utf-8 string contents. always null-terminated. * Note that this is #of bytes **/ char * chars_ = nullptr; }; struct ObjectConversion_String { static gp to_object(gc::IAlloc * mm, std::string x) { return String::copy(mm, x.c_str()); } static std::string from_object(gc::IAlloc *, gp x) { gp x_str = String::from(x); if (x_str.get()) { /* note: ignores allocator, always uses heap. * This will affect operation of primitives (if any) that * expect std::string. Alternative would be use IAlloc*, * with Blob wrapper (or without if/when need for iterable * memory is dropped). */ return std::string(x_str->c_str()); } } }; template <> struct ObjectConversion : public ObjectConversion_String {}; } /*namespace obj*/ } /*namespace xo*/ /* end String.hpp */