/** @file ParserStateMachine.cpp * * @author Roland Conybeare, Jan 2026 **/ #include "ParserStateMachine.hpp" #include "ParserStack.hpp" #include "SyntaxStateMachine.hpp" #include "ToplevelSeqSsm.hpp" #include "DefineSsm.hpp" #include #include #include #include #include #include #include #include #include #include #include #include namespace xo { using xo::mm::ACollector; using xo::mm::AAllocator; using xo::mm::AGCObject; using xo::print::APrintable; using xo::reflect::TypeDescr; using xo::facet::FacetRegistry; using xo::facet::with_facet; namespace scm { namespace { /** Create global environment and populate with builtin primitives. * Get memory from @p mm, with symbol names in @p stringtable. * Stash symbol names in @p global_symtab, which coordinates with * new global environment. * @p pm_install_flags controls which primitives to install **/ obj global_env_setup(StringTable & stringtable, obj mm, DGlobalSymtab * global_symtab, InstallFlags pm_install_flags) { scope log(XO_DEBUG(false)); DGlobalEnv * env = DGlobalEnv::_make(mm, global_symtab); // FUDGING this for now obj err_mm; DSimpleRcx rcx(mm, err_mm, &stringtable); InstallSink sink = ([env, rcx, &log] (std::string_view name, TypeDescr fn_td, obj pm, InstallFlags flags) { (void)flags; obj pm_gco = pm.to_facet(); const DUniqueString * sym = rcx.stringtable()->intern(name); log && log("upsert", xtag("sym", std::string_view(*sym))); env->_upsert_value(rcx.allocator(), sym, fn_td, pm_gco); return true; }); PrimitiveRegistry::instance() .install_primitives(with_facet::mkobj(&rcx), sink, pm_install_flags); return obj(env); } } ParserStateMachine::ParserStateMachine(const ArenaConfig & config, const ArenaHashMapConfig & symtab_var_config, const ArenaHashMapConfig & symtab_type_config, size_type max_stringtable_capacity, InstallFlags pm_install_flags, obj expr_alloc, obj aux_alloc) : stringtable_{max_stringtable_capacity}, parser_alloc_{DArena::map(config)}, expr_alloc_{expr_alloc}, aux_alloc_{aux_alloc}, global_symtab_{DGlobalSymtab::make(expr_alloc, aux_alloc, symtab_var_config, symtab_type_config)}, global_env_{global_env_setup(stringtable_, expr_alloc_, global_symtab_.data(), pm_install_flags)}, debug_flag_{config.debug_flag_} { // see xo-numeric/ {SetupNumeric.cpp, NumericPrimitives.cpp} // for setup of {_mul, _div, _sub, ...} // { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_multiply_pm_name); assert(name); this->multiply_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_divide_pm_name); assert(name); this->divide_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_add_pm_name); assert(name); this->add_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_sub_pm_name); assert(name); this->subtract_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_cmpeq_pm_name); assert(name); this->cmpeq_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_cmpne_pm_name); assert(name); this->cmpne_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_cmplt_pm_name); assert(name); this->cmplt_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_cmple_pm_name); assert(name); this->cmple_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_cmpgt_pm_name); assert(name); this->cmpgt_binding_ = global_symtab_->lookup_binding(name); } { const DUniqueString * name = stringtable_.lookup(NumericPrimitives::c_cmpge_pm_name); assert(name); this->cmpge_binding_ = global_symtab_->lookup_binding(name); } } ParserStateMachine::~ParserStateMachine() { obj gc = expr_alloc_.try_to_facet(); if (gc) { scope log(XO_DEBUG(true), "remove_gc_root not implemented"); gc.remove_gc_root(&global_symtab_); gc.remove_gc_root(&local_symtab_); gc.remove_gc_root(&global_env_); } } obj ParserStateMachine::multiply_pm() const { obj retval = global_env_->lookup_value(multiply_binding_); assert(retval); return retval; } obj ParserStateMachine::divide_pm() const { obj retval = global_env_->lookup_value(divide_binding_); assert(retval); return retval; } obj ParserStateMachine::add_pm() const { obj retval = global_env_->lookup_value(add_binding_); assert(retval); return retval; } obj ParserStateMachine::subtract_pm() const { obj retval = global_env_->lookup_value(subtract_binding_); assert(retval); return retval; } obj ParserStateMachine::cmpeq_pm() const { obj retval = global_env_->lookup_value(cmpeq_binding_); assert(retval); return retval; } obj ParserStateMachine::cmpne_pm() const { obj retval = global_env_->lookup_value(cmpne_binding_); assert(retval); return retval; } obj ParserStateMachine::cmplt_pm() const { obj retval = global_env_->lookup_value(cmplt_binding_); assert(retval); return retval; } obj ParserStateMachine::cmple_pm() const { obj retval = global_env_->lookup_value(cmple_binding_); assert(retval); return retval; } obj ParserStateMachine::cmpgt_pm() const { obj retval = global_env_->lookup_value(cmpgt_binding_); assert(retval); return retval; } obj ParserStateMachine::cmpge_pm() const { obj retval = global_env_->lookup_value(cmpge_binding_); assert(retval); return retval; } bool ParserStateMachine::is_at_toplevel() const noexcept { /* top-level always has DToplevelSeqSsm */ ParserStack * s = stack_; if (s) { auto def = obj::from(s->top()); if (def) { /* carve-out for top-level DefineSsm: report 'at top-level' when * that top-level DefineSsm is on the stack, so we detect * this condition inside DefineSsm's event handling */ s = stack_->parent(); } if (s && s->parent() == nullptr) { auto top = obj::from(s->top()); return top; } } else { /** this isn't a normal operating state, still need a batch/interactive toplevel seq. * just the same seems better to call it top-level **/ return true; } return false; } bool ParserStateMachine::has_incomplete_expr() const noexcept { scope log(XO_DEBUG(debug_flag_)); // don't count toplevel expression ParserStack * s = stack_; if (s) { auto top = obj::from(s->top()); return !top; } else { return false; } } obj ParserStateMachine::top_ssm() const { return this->stack_->top(); } void ParserStateMachine::visit_pools(const MemorySizeVisitor & visitor) const { stringtable_.visit_pools(visitor); parser_alloc_.visit_pools(visitor); global_symtab_->visit_pools(visitor); // not counting {expr_alloc_, fixed_alloc_}. We don't consider // either to be owned by ParserStateMachine } void ParserStateMachine::establish_toplevel_ssm(obj ssm) { scope log(XO_DEBUG(debug_flag_)); assert(stack_ == nullptr); DArena::Checkpoint ckp = parser_alloc_.checkpoint(); this->stack_ = ParserStack::push(nullptr /*stack*/, ckp, parser_alloc_, ssm); this->parser_alloc_ckp_ = parser_alloc_.checkpoint(); } void ParserStateMachine::push_ssm(DArena::Checkpoint ckp, obj ssm) { scope log(XO_DEBUG(debug_flag_)); // note: using parser_alloc_ for parser stack, since stacklike behavior this->stack_ = ParserStack::push(stack_, ckp, parser_alloc_, ssm); } void ParserStateMachine::pop_ssm() { scope log(XO_DEBUG(debug_flag_)); assert(this->stack_); this->stack_ = ParserStack::pop(stack_, parser_alloc_); } const DUniqueString * ParserStateMachine::intern_string(std::string_view str) { return stringtable_.intern(str); } const DUniqueString * ParserStateMachine::gensym(std::string_view str) { return stringtable_.gensym(str); } DVarRef * ParserStateMachine::lookup_varref(std::string_view symbolname) { scope log(XO_DEBUG(debug_flag_)); const DUniqueString * ustr = stringtable_.lookup(symbolname); if (!ustr) { // if we don't already know the symbol, // -> can't be a valid variable reference // (whether global or local) return nullptr; } // TODO: // 1. check global symtab // 2. combine local+global symtab into indept struct // 3. move lookup_varref implementation there. // if (local_symtab_) { DLocalSymtab * symtab = local_symtab_.data(); // count #of nested scopes to cross, to reach symbol // int32_t link_count = 0; while (symtab) { Binding b = symtab->lookup_binding(ustr); if (b.is_local()) { assert(b.i_link() == 0); DVariable * vardef = symtab->lookup_var(b); assert(vardef); /** ascii diagram here **/ return DVarRef::make(expr_alloc_, vardef, link_count); } else { assert(b.is_null()); } ++link_count; symtab = symtab->parent(); } } DVariable * vardef = global_symtab_->lookup_variable(ustr); if (vardef) { return DVarRef::make(expr_alloc_, vardef, 0 /*link_count -- n/a for globals*/); } // symbol not found return nullptr; } void ParserStateMachine::push_local_symtab(DLocalSymtab * symtab) { this->local_symtab_ = obj(symtab); } void ParserStateMachine::pop_local_symtab() { assert(local_symtab_); this->local_symtab_ = obj(local_symtab_->parent()); } void ParserStateMachine::upsert_var(DVariable * var) { assert(global_symtab_); global_symtab_->upsert_variable(this->expr_alloc(), var); } void ParserStateMachine::reset_result() { this->result_ = ParserResult(); } void ParserStateMachine::clear_error_reset() { this->reset_result(); while (stack_ && stack_->parent()) stack_ = stack_->parent(); this->parser_alloc_.restore(parser_alloc_ckp_); } void ParserStateMachine::on_parsed_symbol(std::string_view sym) { scope log(XO_DEBUG(debug_flag_), xtag("sym", sym)); assert(stack_); this->stack_->top().on_parsed_symbol(sym, this); } void ParserStateMachine::on_parsed_typedescr(TypeDescr td) { scope log(XO_DEBUG(debug_flag_), xtag("td", td)); assert(stack_); this->stack_->top().on_parsed_typedescr(td, this); } void ParserStateMachine::on_parsed_type(obj type) { scope log(XO_DEBUG(debug_flag_)); assert(stack_); this->stack_->top().on_parsed_type(type, this); } void ParserStateMachine::on_parsed_formal(const DUniqueString * sym, TypeDescr td) { scope log(XO_DEBUG(debug_flag_), xtag("sym", std::string_view(*sym)), xtag("td", td)); assert(stack_); this->stack_->top().on_parsed_formal(sym, td, this); } void ParserStateMachine::on_parsed_formal_with_token(const DUniqueString * sym, TypeDescr td, const Token & tk) { scope log(XO_DEBUG(debug_flag_), xtag("sym", std::string_view(*sym)), xtag("td", td), xtag("tk", tk)); assert(stack_); this->stack_->top().on_parsed_formal_with_token(sym, td, tk, this); } void ParserStateMachine::on_parsed_formal_arglist(DArray * arglist) { scope log(XO_DEBUG(debug_flag_), xtag("arglist", obj(arglist))); assert(stack_); this->stack_->top().on_parsed_formal_arglist(arglist, this); } void ParserStateMachine::on_parsed_expression(obj expr) { scope log(XO_DEBUG(debug_flag_), xtag("expr", expr)); assert(stack_); this->top_ssm().on_parsed_expression(expr, this); } void ParserStateMachine::on_parsed_expression_with_token(obj expr, const Token & tk) { scope log(XO_DEBUG(debug_flag_), xtag("expr", expr), xtag("tk", tk)); assert(stack_); this->top_ssm().on_parsed_expression_with_token(expr, tk, this); } void ParserStateMachine::on_quoted_literal(obj lit) { this->top_ssm().on_quoted_literal(lit, this); } void ParserStateMachine::on_token(const Token & tk) { scope log(XO_DEBUG(debug_flag_), xtag("tk", tk)); if (!stack_) { // parsing stack should always have toplevel expression sequence throw std::runtime_error(tostr("unexpected empty parsing stack", xtag("token", tk), xtag("help", "do it the same. but better!") )); } stack_->top().on_token(tk, this); } void ParserStateMachine::capture_result(std::string_view ssm_name, obj expr) { this->result_ = ParserResult::expression(ssm_name, expr); } void ParserStateMachine::capture_error(std::string_view ssm_name, const DString * errmsg) { if (result_.is_error()) { /* in case one error triggers another, remmber just the first one */ } else { this->result_ = ParserResult::error(ssm_name, errmsg); } } void ParserStateMachine::illegal_input_on_token(std::string_view ssm_name, const Token & tk, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected token for parsing state", xtag("token", tk), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_token")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_input_on_symbol(std::string_view ssm_name, std::string_view sym, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected symbol for parsing state", xtag("symbol", sym), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_symbol")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_input_on_typedescr(std::string_view ssm_name, TypeDescr td, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected type-description for parsing state", xtag("td", td), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_typedescr")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_input_on_type(std::string_view ssm_name, obj type, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected type for parsing state", xtag("type", type), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_type")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_formal(std::string_view ssm_name, const DUniqueString * param_name, TypeDescr param_type, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected formal", xtag("param_name", std::string_view(*param_name)), xtag("param_type", param_type), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_formal_with_token(std::string_view ssm_name, const DUniqueString * param_name, TypeDescr param_type, const Token & tk, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected formal", xtag("param_name", std::string_view(*param_name)), xtag("param_type", param_type), xtag("tk", tk), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_formal_arglist(std::string_view ssm_name, DArray * arglist, std::string_view expect_str) { obj arglist_pr(arglist); auto errmsg_string = tostr("Unexpected formal arglist", xtag("arglist", arglist_pr), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal_arglist")); assert(expr_alloc_); auto errmsg = DString::from_str(expr_alloc_, errmsg_string); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_expression(std::string_view ssm_name, obj expr, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto expr_pr = expr.to_facet(); //= FacetRegistry::instance().variant(expr); assert(expr_pr); /** TODO * problem here: we have pretty() support for obj, * but not "ordinary printing" support. So expression doesn't get printed **/ auto errmsg_string = tostr("Unexpected expression", xtag("expr", expr_pr), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_expression")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_expression_with_token(std::string_view ssm_name, obj expr, const Token & tk, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware obj expr_pr = FacetRegistry::instance().variant(expr); assert(expr_pr); /** TODO * problem here: we have pretty() support for obj, * but not "ordinary printing" support. So expression doesn't get printed **/ auto errmsg_string = tostr("Unexpected expression", xtag("expr", expr_pr), xtag("tk", tk), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_expression")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_quoted_literal(std::string_view ssm_name, obj lit, std::string_view expect_str) { obj lit_pr = FacetRegistry::instance().variant(lit); /** TODO * problem here: we have pretty() support for obj, * but not "ordinary printing" support. So expression doesn't get printed **/ auto errmsg_string = tostr("Unexpected quoted literal", xtag("lit", lit_pr), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_quoted_literal")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::error_unbound_variable(std::string_view ssm_name, std::string_view sym) { auto errmsg_string = tostr("No binding for symbol", xtag("symbol", sym), xtag("ssm", ssm_name)); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } // ----- gc support ----- #ifdef OBSOLETE void ParserStateMachine::shallow_copy(obj gc) noexcept { (void)gc; assert(false); } #endif void ParserStateMachine::visit_gco_children(VisitReason reason, obj gc) noexcept { //scope log(XO_DEBUG(true)); assert(!stringtable_.is_gc_eligible()); assert(!parser_alloc_.is_gc_eligible()); //log && log("forward stack_", xtag("addr", stack_)); if (stack_) { stack_->visit_gco_children(reason, gc); } // static_assert(!expr_alloc_.is_gc_eligible()); // static_assert(!aux_alloc_.is_gc_eligible()); //log && log("global_symtab_", xtag("addr", global_symtab_.data())); gc.visit_child(reason, &global_symtab_); //log && log("local_symtab_", xtag("addr", local_symtab_.data())); gc.visit_child(reason, &local_symtab_); //log && log("global_env_", xtag("addr", global_env_.data())); gc.visit_child(reason, &global_env_); //log && log("result_"); result_.visit_gco_children(reason, gc); } } /*namespace scm*/ } /*namespace xo*/ /* end ParserStateMachine.cpp */