/** @file ParserStateMachine.cpp * * @author Roland Conybeare, Jan 2026 **/ #include "ParserStateMachine.hpp" #include "ParserStack.hpp" #include "SyntaxStateMachine.hpp" #include "ToplevelSeqSsm.hpp" #include "DefineSsm.hpp" #include #include #include #include #include #include #include #include namespace xo { using xo::mm::MemorySizeInfo; using xo::print::APrintable; using xo::facet::FacetRegistry; using xo::facet::with_facet; namespace scm { ParserStateMachine::ParserStateMachine(const ArenaConfig & config, const ArenaHashMapConfig & symtab_config, size_type max_stringtable_capacity, obj expr_alloc, obj aux_alloc) : stringtable_{max_stringtable_capacity}, parser_alloc_{DArena::map(config)}, expr_alloc_{expr_alloc}, aux_alloc_{aux_alloc}, global_symtab_{DGlobalSymtab::make(expr_alloc, aux_alloc, symtab_config)}, debug_flag_{config.debug_flag_} { } bool ParserStateMachine::is_at_toplevel() const noexcept { /* top-level alwyas has DToplevelSeqSsm */ ParserStack * s = stack_; if (s) { auto def = obj::from(s->top()); if (def) { /* carve-out for top-level DefineSsm: report 'at top-level' when * that top-level DefineSsm is on the stack, so we detect * this condition inside DefineSsm's event handling */ s = stack_->parent(); } if (s && s->parent() == nullptr) { auto top = obj::from(s->top()); return top; } } else { /** this isn't a normal operating state, still need a batch/interactive toplevel seq. * just the same seems better to call it top-level **/ return true; } return false; } bool ParserStateMachine::has_incomplete_expr() const noexcept { scope log(XO_DEBUG(debug_flag_)); // don't count toplevel expression ParserStack * s = stack_; if (s) { auto top = obj::from(s->top()); return !top; } else { return false; } } obj ParserStateMachine::top_ssm() const { return this->stack_->top(); } void ParserStateMachine::visit_pools(const MemorySizeVisitor & visitor) const { stringtable_.visit_pools(visitor); parser_alloc_.visit_pools(visitor); global_symtab_->visit_pools(visitor); // not counting {expr_alloc_, fixed_alloc_}. We don't consider // either to be owned by ParserStateMachine } void ParserStateMachine::establish_toplevel_ssm(obj ssm) { scope log(XO_DEBUG(debug_flag_)); assert(stack_ == nullptr); DArena::Checkpoint ckp = parser_alloc_.checkpoint(); this->stack_ = ParserStack::push(nullptr /*stack*/, ckp, parser_alloc_, ssm); this->parser_alloc_ckp_ = parser_alloc_.checkpoint(); } void ParserStateMachine::push_ssm(DArena::Checkpoint ckp, obj ssm) { scope log(XO_DEBUG(debug_flag_)); // note: using parser_alloc_ for parser stack, since stacklike behavior this->stack_ = ParserStack::push(stack_, ckp, parser_alloc_, ssm); } void ParserStateMachine::pop_ssm() { scope log(XO_DEBUG(debug_flag_)); assert(this->stack_); this->stack_ = ParserStack::pop(stack_, parser_alloc_); } const DUniqueString * ParserStateMachine::intern_string(std::string_view str) { return stringtable_.intern(str); } const DUniqueString * ParserStateMachine::gensym(std::string_view str) { return stringtable_.gensym(str); } DVarRef * ParserStateMachine::lookup_varref(std::string_view symbolname) { scope log(XO_DEBUG(debug_flag_)); const DUniqueString * ustr = stringtable_.lookup(symbolname); if (!ustr) { // if we don't already know the symbol, // -> can't be a valid variable reference // (whether global or local) return nullptr; } // TODO: // 1. check global symtab // 2. combine local+global symtab into indept struct // 3. move lookup_varref implementation there. // if (local_symtab_) { DLocalSymtab * symtab = local_symtab_; // count #of nested scopes to cross, to reach symbol // int32_t link_count = 0; while (symtab) { Binding b = symtab->lookup_binding(ustr); if (b.is_local()) { assert(b.i_link() == 0); DVariable * vardef = symtab->lookup_var(b); assert(vardef); /** ascii diagram here **/ return DVarRef::make(expr_alloc_, vardef, link_count); } else { assert(b.is_null()); } ++link_count; symtab = symtab->parent(); } } DVariable * vardef = global_symtab_->lookup_variable(ustr); if (vardef) { return DVarRef::make(expr_alloc_, vardef, 0 /*link_count -- n/a for globals*/); } // symbol not found return nullptr; } void ParserStateMachine::push_local_symtab(DLocalSymtab * symtab) { this->local_symtab_ = symtab; } void ParserStateMachine::pop_local_symtab() { assert(local_symtab_); this->local_symtab_ = local_symtab_->parent(); } void ParserStateMachine::upsert_var(DVariable * var) { assert(global_symtab_); global_symtab_->upsert_variable(this->expr_alloc(), var); } void ParserStateMachine::reset_result() { this->result_ = ParserResult(); } void ParserStateMachine::clear_error_reset() { this->reset_result(); while (stack_ && stack_->parent()) stack_ = stack_->parent(); this->parser_alloc_.restore(parser_alloc_ckp_); } void ParserStateMachine::on_parsed_symbol(std::string_view sym) { scope log(XO_DEBUG(debug_flag_), xtag("sym", sym)); assert(stack_); this->stack_->top().on_parsed_symbol(sym, this); } void ParserStateMachine::on_parsed_typedescr(TypeDescr td) { scope log(XO_DEBUG(debug_flag_), xtag("td", td)); assert(stack_); this->stack_->top().on_parsed_typedescr(td, this); } void ParserStateMachine::on_parsed_formal(const DUniqueString * sym, TypeDescr td) { scope log(XO_DEBUG(debug_flag_), xtag("sym", std::string_view(*sym)), xtag("td", td)); assert(stack_); this->stack_->top().on_parsed_formal(sym, td, this); } void ParserStateMachine::on_parsed_formal_with_token(const DUniqueString * sym, TypeDescr td, const Token & tk) { scope log(XO_DEBUG(debug_flag_), xtag("sym", std::string_view(*sym)), xtag("td", td), xtag("tk", tk)); assert(stack_); this->stack_->top().on_parsed_formal_with_token(sym, td, tk, this); } void ParserStateMachine::on_parsed_formal_arglist(DArray * arglist) { scope log(XO_DEBUG(debug_flag_), xtag("arglist", obj(arglist))); assert(stack_); this->stack_->top().on_parsed_formal_arglist(arglist, this); } void ParserStateMachine::on_parsed_expression(obj expr) { scope log(XO_DEBUG(debug_flag_), xtag("expr", expr)); assert(stack_); this->top_ssm().on_parsed_expression(expr, this); } void ParserStateMachine::on_parsed_expression_with_token(obj expr, const Token & tk) { scope log(XO_DEBUG(debug_flag_), xtag("expr", expr), xtag("tk", tk)); assert(stack_); this->top_ssm().on_parsed_expression_with_token(expr, tk, this); } void ParserStateMachine::on_token(const Token & tk) { scope log(XO_DEBUG(debug_flag_), xtag("tk", tk)); if (!stack_) { // parsing stack should always have toplevel expression sequence throw std::runtime_error(tostr("unexpected empty parsing stack", xtag("token", tk), xtag("help", "do it the same. but better!") )); } stack_->top().on_token(tk, this); } void ParserStateMachine::capture_result(std::string_view ssm_name, obj expr) { this->result_ = ParserResult::expression(ssm_name, expr); } void ParserStateMachine::capture_error(std::string_view ssm_name, const DString * errmsg) { if (result_.is_error()) { /* in case one error triggers another, remmber just the first one */ } else { this->result_ = ParserResult::error(ssm_name, errmsg); } } void ParserStateMachine::illegal_input_on_token(std::string_view ssm_name, const Token & tk, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected token for parsing state", xtag("token", tk), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_token")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_input_on_symbol(std::string_view ssm_name, std::string_view sym, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected symbol for parsing state", xtag("symbol", sym), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_symbol")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_input_on_typedescr(std::string_view ssm_name, TypeDescr td, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected type-description for parsing state", xtag("td", td), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_input_on_typedescr")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_formal(std::string_view ssm_name, const DUniqueString * param_name, TypeDescr param_type, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected formal", xtag("param_name", std::string_view(*param_name)), xtag("param_type", param_type), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_formal_with_token(std::string_view ssm_name, const DUniqueString * param_name, TypeDescr param_type, const Token & tk, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto errmsg_string = tostr("Unexpected formal", xtag("param_name", std::string_view(*param_name)), xtag("param_type", param_type), xtag("tk", tk), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_formal_arglist(std::string_view ssm_name, DArray * arglist, std::string_view expect_str) { obj arglist_pr(arglist); auto errmsg_string = tostr("Unexpected formal arglist", xtag("arglist", arglist_pr), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal_arglist")); assert(expr_alloc_); auto errmsg = DString::from_str(expr_alloc_, errmsg_string); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_expression(std::string_view ssm_name, obj expr, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware auto expr_pr = expr.to_facet(); //= FacetRegistry::instance().variant(expr); assert(expr_pr); /** TODO * problem here: we have pretty() support for obj, * but not "ordinary printing" support. So expression doesn't get printed **/ auto errmsg_string = tostr("Unexpected expression", xtag("expr", expr_pr), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_expression")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::illegal_parsed_expression_with_token(std::string_view ssm_name, obj expr, const Token & tk, std::string_view expect_str) { // TODO: // - want to write error message using DArena // - need something like log_streambuf and/or tostr() that's arena-aware obj expr_pr = FacetRegistry::instance().variant(expr); assert(expr_pr); /** TODO * problem here: we have pretty() support for obj, * but not "ordinary printing" support. So expression doesn't get printed **/ auto errmsg_string = tostr("Unexpected expression", xtag("expr", expr_pr), xtag("tk", tk), xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_expression")); assert(expr_alloc_); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } void ParserStateMachine::error_unbound_variable(std::string_view ssm_name, std::string_view sym) { auto errmsg_string = tostr("No binding for symbol", xtag("symbol", sym), xtag("ssm", ssm_name)); auto errmsg = DString::from_view(expr_alloc_, std::string_view(errmsg_string)); this->capture_error(ssm_name, errmsg); } } /*namespace scm*/ } /*namespace xo*/ /* end ParserStateMachine.cpp */