xo-interpreter: handle litersl strings. Broken memory model.
This commit is contained in:
parent
6e9349f70c
commit
cf846b2f8d
16 changed files with 234 additions and 32 deletions
|
|
@ -88,7 +88,7 @@ namespace xo {
|
|||
value_td_{Reflect::require<T>()},
|
||||
value_(x)
|
||||
{
|
||||
static_assert(std::is_standard_layout_v<T> && std::is_trivial_v<T>);
|
||||
//static_assert(std::is_standard_layout_v<T> && std::is_trivial_v<T>);
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -14,19 +14,6 @@ namespace xo {
|
|||
public:
|
||||
using ObjectConverter = xo::obj::ObjectConverter;
|
||||
|
||||
/** install conversions for PrimitiveExpr<Fn> -> Primitive<Fn>
|
||||
* for particular function pointer types Fn.
|
||||
*
|
||||
* Source type from xo-expression
|
||||
* Dest type from xo-object.
|
||||
*
|
||||
* Module dependence goes the other way
|
||||
* i.e. xo-interpreter -uses-> xo-expression
|
||||
* -uses-> xo-object
|
||||
* For this reason rejected adding a virtual method to PrimitiveExprInterface
|
||||
**/
|
||||
static void install_interpreter_conversions(ObjectConverter * target);
|
||||
|
||||
template <typename Expr>
|
||||
static void install_pm(gc::IAlloc * mm, rp<Expr> pm_expr, gp<GlobalEnv> env) {
|
||||
gp<Object> rhs
|
||||
|
|
|
|||
|
|
@ -23,12 +23,6 @@ namespace xo {
|
|||
return x + y;
|
||||
}
|
||||
|
||||
void
|
||||
BuiltinPrimitives::install_interpreter_conversions(ObjectConverter * /*target*/)
|
||||
{
|
||||
/* abandoning this path */
|
||||
}
|
||||
|
||||
void
|
||||
BuiltinPrimitives::install(gc::IAlloc * mm, gp<GlobalEnv> env)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -110,7 +110,6 @@ namespace xo {
|
|||
toplevel_env_{env},
|
||||
log_level_{ll}
|
||||
{
|
||||
BuiltinPrimitives::install_interpreter_conversions(&object_converter_);
|
||||
}
|
||||
|
||||
// ----- VirtualSchematikaMachine -----
|
||||
|
|
@ -321,7 +320,15 @@ namespace xo {
|
|||
|
||||
VSM_CONTINUE();
|
||||
} else {
|
||||
/* see ObjectConverter::ctor to add more builtin types */
|
||||
/* see ObjectConverter::ctor to add more builtin types
|
||||
*
|
||||
* generally conversion for a type Foo will appear in Foo.hpp
|
||||
* see
|
||||
* xo/object/Boolean.hpp
|
||||
* xo/object/Integer.hpp
|
||||
* xo/object/Float.hpp
|
||||
* xo/object/String.hpp
|
||||
*/
|
||||
|
||||
VSM_ERROR(tostr("constant_op: unable to convert native value to object",
|
||||
xtag("id", expr->value_tp().td()->id()),
|
||||
|
|
|
|||
|
|
@ -3,11 +3,41 @@
|
|||
* author: Roland Conybeare, Aug 2025
|
||||
*/
|
||||
|
||||
#include "xo/alloc/IAlloc.hpp"
|
||||
#include "xo/alloc/Object.hpp"
|
||||
#include "ObjectConversion.hpp"
|
||||
#include "xo/alloc/IAlloc.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace obj {
|
||||
/** unicode terminology (via https://utf8everywhere.org)
|
||||
* 1. code unit:
|
||||
* bit combination that represents a unit of encoded text.
|
||||
* 8-bits for utf-8 i.e. code-unit = char
|
||||
* 2. code point:
|
||||
* a numerical value in the unicode namespace, e.g. U+3243F
|
||||
* one or more code units encode a code point.
|
||||
* utf-8 uses 1-4 code units to encode each code point.
|
||||
* 3. abstract character:
|
||||
* inherently open, because includes characters that are not
|
||||
* (yet) representable in unicode.
|
||||
* 4. (en)coded character:
|
||||
* mapping between code points and abstract character.
|
||||
* for example U+1F428 is coded character for emoji named 'KOALA'
|
||||
* caveats:
|
||||
* - some code points do not have abstract characters assigned.
|
||||
* - some code points are reserved for non-characters
|
||||
* (e.g. null, newline ..)
|
||||
* - some abstract characters require multiple code points:
|
||||
* for example requiring a composition sequence
|
||||
* - some abstract characters have multiple encodings
|
||||
* 5. user-perceived character. whatever you think that means.
|
||||
* May be language-dependent.
|
||||
* 6. grapheme cluster. a sequence of coded characters that
|
||||
* "belong together". for example backspace would erase
|
||||
* a grapheme cluster atomically.
|
||||
* 7. a shape within a font. A sequence of code points maps to
|
||||
* a sequence of glyphs.
|
||||
**/
|
||||
class String : public Object {
|
||||
public:
|
||||
enum class owner { unique, shared };
|
||||
|
|
@ -32,6 +62,18 @@ namespace xo {
|
|||
const char * c_str() const { return chars_; }
|
||||
std::size_t length() const;
|
||||
|
||||
/** Approximate number of columns (if using a fixed-width font) occupied
|
||||
* by this string. Obtained by counting bytes up to null terminator,
|
||||
* omitting utf-8 continuation bytes, i.e. bytes with high bit set
|
||||
* and 2nd-highest bit clear.
|
||||
*
|
||||
* @text
|
||||
* bits: 76543210
|
||||
* 10______
|
||||
* @endtext
|
||||
**/
|
||||
std::size_t columns() const;
|
||||
|
||||
// inherited from Object..
|
||||
virtual TaggedPtr self_tp() const final override;
|
||||
virtual void display(std::ostream & os) const final override;
|
||||
|
|
@ -48,11 +90,36 @@ namespace xo {
|
|||
/** true iff storage in @ref chars_ is owned by this String.
|
||||
**/
|
||||
owner owner_ = owner::shared;
|
||||
/** length of @ref chars_ in bytes (storage allocated, not necessarily string length) **/
|
||||
/** length of @ref chars_ in bytes (storage allocated, not necessarily string length).
|
||||
* Includes null terminator
|
||||
**/
|
||||
std::size_t z_chars_ = 0;
|
||||
/** string contents. always null-terminated **/
|
||||
/** utf-8 string contents. always null-terminated.
|
||||
* Note that this is #of bytes
|
||||
**/
|
||||
char * chars_ = nullptr;
|
||||
};
|
||||
|
||||
struct ObjectConversion_String {
|
||||
static gp<Object> to_object(gc::IAlloc * mm, std::string x) {
|
||||
return String::copy(mm, x.c_str());
|
||||
}
|
||||
static std::string from_object(gc::IAlloc *, gp<Object> x) {
|
||||
gp<String> x_str = String::from(x);
|
||||
if (x_str.get()) {
|
||||
/* note: ignores allocator, always uses heap.
|
||||
* This will affect operation of primitives (if any) that
|
||||
* expect std::string. Alternative would be use IAlloc*,
|
||||
* with Blob wrapper (or without if/when need for iterable
|
||||
* memory is dropped).
|
||||
*/
|
||||
return std::string(x_str->c_str());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ObjectConversion<std::string> : public ObjectConversion_String {};
|
||||
} /*namespace obj*/
|
||||
} /*namespace xo*/
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "Integer.hpp"
|
||||
#include "Float.hpp"
|
||||
#include "Boolean.hpp"
|
||||
#include "String.hpp"
|
||||
#include "TaggedPtr.hpp"
|
||||
#include "xo/alloc/Blob.hpp"
|
||||
|
||||
|
|
@ -121,6 +122,30 @@ namespace xo {
|
|||
|
||||
return Reflect::make_tp(bool_obj->value() ? &s_true : &s_false);
|
||||
}
|
||||
|
||||
gp<Object>
|
||||
string_to_object(IAlloc * mm, const TaggedPtr & src)
|
||||
{
|
||||
std::string * native = src.recover_native<std::string>();
|
||||
|
||||
assert(native);
|
||||
|
||||
return String::copy(mm, native->c_str());
|
||||
}
|
||||
|
||||
TaggedPtr
|
||||
object_to_string(IAlloc * /*mm*/, gp<Object> obj)
|
||||
{
|
||||
gp<String> string_obj = String::from(obj);
|
||||
|
||||
if (!string_obj.get()) {
|
||||
throw std::runtime_error(tostr("Object obj founcd where String expected",
|
||||
xtag("obj", obj)));
|
||||
}
|
||||
|
||||
// still don't have good solver for this yet
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
ObjectConverter::ObjectConverter()
|
||||
|
|
@ -131,6 +156,8 @@ namespace xo {
|
|||
this->establish_conversion<double>(&float_to_object<double>, &object_to_float<double>);
|
||||
|
||||
this->establish_conversion<bool>(&bool_to_object, &object_to_bool);
|
||||
|
||||
this->establish_conversion<std::string>(&string_to_object, &object_to_string);
|
||||
}
|
||||
|
||||
gp<Object>
|
||||
|
|
|
|||
|
|
@ -99,6 +99,22 @@ namespace xo {
|
|||
return ::strlen(chars_);
|
||||
}
|
||||
|
||||
std::size_t
|
||||
String::columns() const
|
||||
{
|
||||
size_t retval = 0;
|
||||
|
||||
for (const char * p = chars_, * e = chars_ + z_chars_; *p && (p < e); ++p) {
|
||||
if ((*p & 0xc0) == 0x80) {
|
||||
/* continuation byte -> ignore */
|
||||
} else {
|
||||
++retval;
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
TaggedPtr
|
||||
String::self_tp() const {
|
||||
return Reflect::make_tp(const_cast<String*>(this));
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ namespace xo {
|
|||
std::vector<Testcase_String>
|
||||
s_testcase_v = {
|
||||
Testcase_String(1024, 4096, 512, 512, {"hello"}),
|
||||
// in emacs: C-x 8 RET lambda
|
||||
//
|
||||
Testcase_String(1024, 4096, 512, 512, {"λ"}),
|
||||
Testcase_String(1024, 4096, 512, 512, {"hello", ", world!"})
|
||||
};
|
||||
}
|
||||
|
|
@ -168,6 +171,31 @@ namespace xo {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("String.columns", "[String][unicode]")
|
||||
{
|
||||
const bool c_debug_flag = false;
|
||||
up<ArenaAlloc> arena = ArenaAlloc::make("testarena",
|
||||
16*1024, c_debug_flag);
|
||||
|
||||
Object::mm = arena.get();
|
||||
|
||||
gp<String> s0 = String::copy("");
|
||||
|
||||
REQUIRE(s0->columns() == 0);
|
||||
REQUIRE(s0->length() == 0);
|
||||
|
||||
gp<String> s1 = String::copy("l");
|
||||
|
||||
REQUIRE(s1->columns() == 1);
|
||||
REQUIRE(s1->length() == 1);
|
||||
|
||||
gp<String> s2 = String::copy("λ");
|
||||
|
||||
REQUIRE(s2->columns() == 1);
|
||||
/* two code units in code point */
|
||||
REQUIRE(s2->length() == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("String.append", "[String]")
|
||||
{
|
||||
const bool c_debug_flag = false;
|
||||
|
|
|
|||
|
|
@ -338,7 +338,7 @@ namespace xo {
|
|||
ContentsType contents_;
|
||||
/* accumulator for some binary function of Values.
|
||||
* must be associative, since value will be produced
|
||||
* by any testing of calls to Reduce::combine().
|
||||
* by any ordering of calls to Reduce::combine().
|
||||
*
|
||||
* e.g. {a, b, c, d} could be reduced by:
|
||||
* r(r(a,b), r(c,d))
|
||||
|
|
|
|||
|
|
@ -68,6 +68,9 @@ namespace xo {
|
|||
virtual void on_f64_token(const token_type & tk,
|
||||
parserstatemachine * p_psm) override;
|
||||
|
||||
virtual void on_string_token(const token_type & tk,
|
||||
parserstatemachine * p_psm) final override;
|
||||
|
||||
/** update exprstate in response to a successfully-parsed subexpression **/
|
||||
virtual void on_expr(bp<Expression> expr,
|
||||
parserstatemachine * p_psm) override;
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ namespace xo {
|
|||
parserstatemachine * p_psm) override;
|
||||
virtual void on_f64_token(const token_type & tk,
|
||||
parserstatemachine * p_psm) override;
|
||||
virtual void on_string_token(const token_type & tk,
|
||||
parserstatemachine * p_psm) final override;
|
||||
|
||||
// ----- victory methods -----
|
||||
|
||||
|
|
|
|||
|
|
@ -218,6 +218,10 @@ namespace xo {
|
|||
virtual void on_f64_token(const token_type & tk,
|
||||
parserstatemachine * p_psm);
|
||||
|
||||
/** handle incoming string-literal token **/
|
||||
virtual void on_string_token(const token_type & tk,
|
||||
parserstatemachine * p_psm);
|
||||
|
||||
protected:
|
||||
/** throw exception when next token is inconsistent with
|
||||
* parsing state
|
||||
|
|
|
|||
|
|
@ -173,7 +173,7 @@ namespace xo {
|
|||
*/
|
||||
progress_xs::start(var.promote(), p_psm);
|
||||
|
||||
#ifdef NOT_YET
|
||||
#ifdef NOT_YET
|
||||
p_stack->push_exprstate(exprstate(exprstatetype::expr_progress,
|
||||
Variable::make(name, type)));
|
||||
#endif
|
||||
|
|
@ -227,6 +227,21 @@ namespace xo {
|
|||
p_psm);
|
||||
}
|
||||
|
||||
void
|
||||
expect_expr_xs::on_string_token(const token_type & tk,
|
||||
parserstatemachine * p_psm)
|
||||
{
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
/* e.g.
|
||||
* def msg = "hello, world";
|
||||
* \----tk----/
|
||||
*/
|
||||
progress_xs::start
|
||||
(Constant<std::string>::make(tk.text()),
|
||||
p_psm);
|
||||
}
|
||||
|
||||
void
|
||||
expect_expr_xs::on_expr(bp<Expression> expr,
|
||||
parserstatemachine * p_psm)
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ namespace xo {
|
|||
|
||||
if (tk.text() == "bool")
|
||||
td = Reflect::require<bool>();
|
||||
else if (tk.text() == "str")
|
||||
td = Reflect::require<std::string>();
|
||||
else if (tk.text() == "f64")
|
||||
td = Reflect::require<double>();
|
||||
else if(tk.text() == "f32")
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ namespace xo {
|
|||
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
constexpr const char * c_self_name = "exprseq_xs::on_i64_token";
|
||||
constexpr const char * c_self_name = "exprseq_xs::on_f64_token";
|
||||
|
||||
if (xseqtype_ == exprseqtype::toplevel_interactive)
|
||||
{
|
||||
|
|
@ -199,6 +199,44 @@ namespace xo {
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
exprseq_xs::on_string_token(const token_type & tk,
|
||||
parserstatemachine * p_psm)
|
||||
{
|
||||
using xo::scm::Constant;
|
||||
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
constexpr const char * c_self_name = "exprseq_xs::on_string_token";
|
||||
|
||||
if (xseqtype_ == exprseqtype::toplevel_interactive)
|
||||
{
|
||||
// remark:
|
||||
// 1. Constant is an expression. At present (nov 2025) these are
|
||||
// reference-counted (+ leak in xo-interpreter).
|
||||
// 2. Could fix leak by adding a finalization feature to GC.
|
||||
// Do intend to eventually support finalization,
|
||||
// but not to use it here.
|
||||
// 3. Instead mean to change allocation strategy for Expression
|
||||
// to use GC instead.
|
||||
// 4. As intermediate step try migrating Expression hierarchy
|
||||
// to support arena allocation.
|
||||
// See xo/alloc/ArenaAllocT.hpp + assoc'd unit test
|
||||
//
|
||||
progress_xs::start(Constant<std::string>::make(tk.text()), p_psm);
|
||||
} else {
|
||||
/* policy: don't allow literals as toplevel expressions
|
||||
* unless interactive session.
|
||||
*/
|
||||
const char * exp = get_expect_str();
|
||||
|
||||
this->illegal_input_on_token(c_self_name,
|
||||
tk,
|
||||
exp,
|
||||
p_psm);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
exprseq_xs::on_typedescr(TypeDescr /*td*/,
|
||||
parserstatemachine * /*p_psm*/)
|
||||
|
|
|
|||
|
|
@ -336,7 +336,7 @@ namespace xo {
|
|||
{
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
constexpr const char * c_self_name = "exprstate::on_bool";
|
||||
constexpr const char * c_self_name = "exprstate::on_bool_token";
|
||||
const char * exp = get_expect_str();
|
||||
|
||||
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
|
||||
|
|
@ -348,7 +348,7 @@ namespace xo {
|
|||
{
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
constexpr const char * c_self_name = "exprstate::on_i64";
|
||||
constexpr const char * c_self_name = "exprstate::on_i64_token";
|
||||
const char * exp = get_expect_str();
|
||||
|
||||
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
|
||||
|
|
@ -360,7 +360,19 @@ namespace xo {
|
|||
{
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
constexpr const char * c_self_name = "exprstate::on_f64";
|
||||
constexpr const char * c_self_name = "exprstate::on_f64_token";
|
||||
const char * exp = get_expect_str();
|
||||
|
||||
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
|
||||
}
|
||||
|
||||
void
|
||||
exprstate::on_string_token(const token_type & tk,
|
||||
parserstatemachine * p_psm)
|
||||
{
|
||||
scope log(XO_DEBUG(p_psm->debug_flag()));
|
||||
|
||||
constexpr const char * c_self_name = "exprstate::on_string_token";
|
||||
const char * exp = get_expect_str();
|
||||
|
||||
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
|
||||
|
|
@ -399,7 +411,7 @@ namespace xo {
|
|||
return;
|
||||
|
||||
case tokentype::tk_string:
|
||||
assert(false);
|
||||
this->on_string_token(tk, p_psm);
|
||||
return;
|
||||
|
||||
case tokentype::tk_symbol:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue