From f5ccd99dd2a6ae71219d78d13944343b3056726b Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Fri, 30 Jan 2026 10:26:35 -0500 Subject: [PATCH] xo-expression2 xo-reader2: local symtab stack in PSM --- .../xo/reader2/DExpectFormalArglistSsm.hpp | 8 ++- include/xo/reader2/DLambdaSsm.hpp | 4 +- include/xo/reader2/ParserStateMachine.hpp | 31 ++++++++++- src/reader2/DExpectFormalArglistSsm.cpp | 25 +++++---- src/reader2/DLambdaSsm.cpp | 52 +++++++++++++++++++ src/reader2/ParserStack.cpp | 2 +- src/reader2/ParserStateMachine.cpp | 20 +++++++ utest/SchematikaParser.test.cpp | 6 +-- 8 files changed, 127 insertions(+), 21 deletions(-) diff --git a/include/xo/reader2/DExpectFormalArglistSsm.hpp b/include/xo/reader2/DExpectFormalArglistSsm.hpp index 289b7cf5..81d7e85d 100644 --- a/include/xo/reader2/DExpectFormalArglistSsm.hpp +++ b/include/xo/reader2/DExpectFormalArglistSsm.hpp @@ -71,14 +71,18 @@ namespace xo { /** @defgroup scm-expectformalarglistssm-methods general methods **/ ///@{ - /** update state on incoming token @p tk, with overall parser state in @p psm **/ + /** update state on incoming token @p tk, with overall parser state in @p p_psm **/ void on_leftparen_token(const Token & tk, ParserStateMachine * p_psm); - /** update state on incoming token @p tk, with overall parser state in @p psm **/ + /** update state on incoming token @p tk, with overall parser state in @p p_psm **/ void on_comma_token(const Token & tk, ParserStateMachine * p_psm); + /** update state on incoming rightparen token @p tk, with overall parser state in @p p_psm **/ + void on_rightparen_token(const Token & tk, + ParserStateMachine * p_psm); + ///@} /** @defgroup scm-expectformalarglistssm-ssm-facet syntaxstatemachine facet methods **/ ///@{ diff --git a/include/xo/reader2/DLambdaSsm.hpp b/include/xo/reader2/DLambdaSsm.hpp index 8800ad1c..22185269 100644 --- a/include/xo/reader2/DLambdaSsm.hpp +++ b/include/xo/reader2/DLambdaSsm.hpp @@ -144,8 +144,6 @@ namespace xo { #ifdef NOT_YET virtual const char * get_expect_str() const override; - virtual void on_formal_arglist(const std::vector> & argl, - parserstatemachine * p_psm) override; virtual void on_leftbrace_token(const token_type & tk, parserstatemachine * p_psm) override; virtual void on_colon_token(const token_type & tk, @@ -173,10 +171,10 @@ namespace xo { /** parsing state-machine state **/ lambdastatetype lmstate_ = lambdastatetype::lm_0; -#ifdef NOT_YET /** lambda environment (for formal parameters) **/ DLocalSymtab * local_symtab_ = nullptr; +#ifdef NOT_YET /** explicit return type (if supplied) **/ TypeDescr explicit_return_td_ = nullptr; diff --git a/include/xo/reader2/ParserStateMachine.hpp b/include/xo/reader2/ParserStateMachine.hpp index 1c5729a6..fbf636e3 100644 --- a/include/xo/reader2/ParserStateMachine.hpp +++ b/include/xo/reader2/ParserStateMachine.hpp @@ -6,6 +6,8 @@ #pragma once #include "ParserResult.hpp" +#include +#include #include #include #include @@ -47,6 +49,7 @@ namespace xo { bool debug_flag() const noexcept { return debug_flag_; } ParserStack * stack() const noexcept { return stack_; } obj expr_alloc() const noexcept { return expr_alloc_; } + DLocalSymtab * local_symtab() const noexcept { return local_symtab_; } const ParserResult & result() const noexcept { return result_; } /** true iff state machine is currently idle (at top-level) **/ @@ -79,6 +82,15 @@ namespace xo { **/ const DUniqueString * intern_string(std::string_view str); + /** push nested local symtab while parsing the body of a lambda expression; + * restore previous symtab at the end of lambda-expression definition. + * See @ref pop_local_symtab + **/ + void push_local_symtab(DLocalSymtab * symtab); + + /** pop nested symbol table from symbol-table stack **/ + void pop_local_symtab(); + /** add variable to current local environment (innermost lexical scope) **/ void upsert_var(DVariable * var); @@ -219,8 +231,10 @@ namespace xo { * after encountering a parsing error. **/ DArena::Checkpoint parser_alloc_ckp_; - - /** parser stack. Memory from @ref parser_alloc_ **/ + /** parser stack. Memory always from @ref parser_alloc_; + * elements that should survive parsing allocate from + * @ref expr_alloc_, see below. + **/ ParserStack * stack_ = nullptr; /** Allocator for parsed expressions. @@ -239,6 +253,19 @@ namespace xo { **/ obj expr_alloc_; + /** symbol table with local bindings. + * non-null during parsing of lambda expressions. + * Always allocated from @p expr_alloc_. + * Push local symbol table here to remember local params + * during the body of a lambda expression. + **/ + DLocalSymtab * local_symtab_ = nullptr; + + /** global symbol table. + * Toplevel definitions go here. + **/ + DGlobalSymtab * global_symtab_ = nullptr; + /** current output from parser **/ ParserResult result_; diff --git a/src/reader2/DExpectFormalArglistSsm.cpp b/src/reader2/DExpectFormalArglistSsm.cpp index 63113f4e..bebe4108 100644 --- a/src/reader2/DExpectFormalArglistSsm.cpp +++ b/src/reader2/DExpectFormalArglistSsm.cpp @@ -126,6 +126,10 @@ namespace xo { this->on_comma_token(tk, p_psm); return; + case tokentype::tk_rightparen: + this->on_rightparen_token(tk, p_psm); + return; + // all the not-yet-handled cases case tokentype::tk_lambda: case tokentype::tk_def: @@ -139,7 +143,6 @@ namespace xo { case tokentype::tk_bool: case tokentype::tk_semicolon: case tokentype::tk_invalid: - case tokentype::tk_rightparen: case tokentype::tk_leftbracket: case tokentype::tk_rightbracket: case tokentype::tk_leftbrace: @@ -300,20 +303,22 @@ namespace xo { this->get_expect_str()); } -#ifdef NOT_YET void - expect_formal_arglist_xs::on_rightparen_token(const token_type & tk, - parserstatemachine * p_psm) + DExpectFormalArglistSsm::on_rightparen_token(const Token & tk, + ParserStateMachine * p_psm) { - if (farglxs_type_ == formalarglstatetype::argl_1b) { - std::unique_ptr self = p_psm->pop_exprstate(); + if (fastate_ == formalarglstatetype::argl_1b) { + DArray * args = argl_; - p_psm->top_exprstate().on_formal_arglist(this->argl_, p_psm); - } else { - exprstate::on_rightparen_token(tk, p_psm); + p_psm->pop_ssm(); + p_psm->on_parsed_formal_arglist(args); + return; } + + p_psm->illegal_input_on_token("DExpectFormalArglistSsm::on_rightparen_token", + tk, + this->get_expect_str()); } -#endif bool DExpectFormalArglistSsm::pretty(const ppindentinfo & ppii) const diff --git a/src/reader2/DLambdaSsm.cpp b/src/reader2/DLambdaSsm.cpp index 99e01ffd..4bcbb844 100644 --- a/src/reader2/DLambdaSsm.cpp +++ b/src/reader2/DLambdaSsm.cpp @@ -9,6 +9,9 @@ #include "ssm/ISyntaxStateMachine_DExpectFormalArglistSsm.hpp" #include "ParserStateMachine.hpp" #include "syntaxstatetype.hpp" +#include +#include +//#include #include #include #include @@ -28,6 +31,7 @@ namespace xo { using xo::print::APrintable; using xo::mm::AAllocator; + using xo::mm::AGCObject; using xo::facet::FacetRegistry; using xo::reflect::typeseq; @@ -353,6 +357,54 @@ namespace xo { DLambdaSsm::on_parsed_formal_arglist(DArray * arglist, ParserStateMachine * p_psm) { + if (lmstate_ == lambdastatetype::lm_1) { + this->lmstate_ = lambdastatetype::lm_2; + /// something with top env frame ? + + /// TODO: arena-friendly non-gc-aware vector; + // use instead of DArray for arglist. + // something like DTypedArray + + /// create LocalSymtab from arglist + + DLocalSymtab * symtab + = DLocalSymtab::_make_empty(p_psm->expr_alloc(), + p_psm->local_symtab(), + arglist->size()); + assert(symtab); + + for (DArray::size_type i = 0, n = arglist->size(); i < n; ++i) { + obj param = arglist->at(i); + + // sad! runtime poly conversion from obj + // We on need this because of (suboptimally) using DArray to store arglist + + obj param_expr + = FacetRegistry::instance().variant(param); + obj param_var + = obj::from(param_expr); + + assert(param_expr.data()); + assert(param_var.data()); + + Binding b = symtab->append_var(p_psm->expr_alloc(), + param_var.data()->name(), + param_var.data()->typeref()); + + assert(b.is_local()); + + this->local_symtab_ = symtab; + } + + // stash env frame: records local variables while we handle lambda body + + p_psm->push_local_symtab(symtab); + + // control reenters via .on_colon_token() / .on_leftbrace_token() + + return; + } + p_psm->illegal_parsed_formal_arglist("DLambdaSsm::on_parsed_formal_arglist", arglist, this->get_expect_str()); diff --git a/src/reader2/ParserStack.cpp b/src/reader2/ParserStack.cpp index 9af9f339..e3ffb290 100644 --- a/src/reader2/ParserStack.cpp +++ b/src/reader2/ParserStack.cpp @@ -70,7 +70,7 @@ namespace xo { std::size_t i_frame = 0; while (frame) { - char buf[80]; + char buf[32]; snprintf(buf, sizeof(buf), "[%lu]", i_frame); auto ssm = (FacetRegistry::instance().variant diff --git a/src/reader2/ParserStateMachine.cpp b/src/reader2/ParserStateMachine.cpp index 0316515e..085cce56 100644 --- a/src/reader2/ParserStateMachine.cpp +++ b/src/reader2/ParserStateMachine.cpp @@ -88,6 +88,20 @@ namespace xo { return stringtable_.intern(str); } + void + ParserStateMachine::push_local_symtab(DLocalSymtab * symtab) + { + this->local_symtab_ = symtab; + } + + void + ParserStateMachine::pop_local_symtab() + { + assert(local_symtab_); + + this->local_symtab_ = local_symtab_->parent(); + } + void ParserStateMachine::upsert_var(DVariable * var) { @@ -310,6 +324,12 @@ namespace xo { xtag("expecting", expect_str), xtag("ssm", ssm_name), xtag("via", "ParserStateMachine::illegal_parsed_formal_arglist")); + + assert(expr_alloc_); + + auto errmsg = DString::from_str(expr_alloc_, errmsg_string); + + this->capture_error(ssm_name, errmsg); } void diff --git a/utest/SchematikaParser.test.cpp b/utest/SchematikaParser.test.cpp index cedb4c1d..cc1e51fa 100644 --- a/utest/SchematikaParser.test.cpp +++ b/utest/SchematikaParser.test.cpp @@ -327,11 +327,10 @@ namespace xo { REQUIRE(result.is_incomplete()); } -#ifdef NOT_YET { - auto & result = parser.on_token(Token::else_token()); + auto & result = parser.on_token(Token::rightparen_token()); - log && log("after else token:"); + log && log("after rightparen token:"); log && log(xtag("parser", &parser)); log && log(xtag("result", result)); @@ -340,6 +339,7 @@ namespace xo { REQUIRE(result.is_incomplete()); } +#ifdef NOT_YET { auto & result = parser.on_token(Token::string_token("fooey"));