From 903121037586df083dcb94d4b9443a7cda287a1e Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Mon, 28 Jul 2025 09:12:41 -0400 Subject: [PATCH] xo-expression xo-reader: parser improvements, prep type inf/unify --- include/xo/reader/expect_expr_xs.hpp | 1 + include/xo/reader/if_else_xs.hpp | 7 +++ include/xo/reader/lambda_xs.hpp | 3 ++ include/xo/reader/parserstatemachine.hpp | 13 +++++- src/reader/expect_expr_xs.cpp | 8 ++++ src/reader/if_else_xs.cpp | 38 ++++++++++++---- src/reader/lambda_xs.cpp | 58 +++++++++++++++++++++++- src/reader/parser.cpp | 3 +- src/reader/parserstatemachine.cpp | 10 ++++ 9 files changed, 127 insertions(+), 14 deletions(-) diff --git a/include/xo/reader/expect_expr_xs.hpp b/include/xo/reader/expect_expr_xs.hpp index bfca1603..b4c8b388 100644 --- a/include/xo/reader/expect_expr_xs.hpp +++ b/include/xo/reader/expect_expr_xs.hpp @@ -77,6 +77,7 @@ namespace xo { parserstatemachine * p_psm) override; virtual void print(std::ostream & os) const override; + virtual bool pretty_print(const xo::print::ppindentinfo & ppii) const override; private: static std::unique_ptr make(bool allow_defs, diff --git a/include/xo/reader/if_else_xs.hpp b/include/xo/reader/if_else_xs.hpp index 6086eac0..04efb75c 100644 --- a/include/xo/reader/if_else_xs.hpp +++ b/include/xo/reader/if_else_xs.hpp @@ -73,6 +73,8 @@ namespace xo { parserstatemachine * p_psm) override; virtual void on_semicolon_token(const token_type & tk, parserstatemachine * p_psm) override; + virtual void on_rightbrace_token(const token_type & tk, + parserstatemachine * p_psm) override; virtual void on_expr(bp expr, parserstatemachine * p_psm) override; @@ -84,6 +86,11 @@ namespace xo { private: static std::unique_ptr make(); + /** exit this exprstate, + * and deliver @ref if_expr_ to parent exprstate + **/ + void finish_and_continue(parserstatemachine * p_psm); + private: ifexprstatetype ifxs_type_; /** scaffold output expression here **/ diff --git a/include/xo/reader/lambda_xs.hpp b/include/xo/reader/lambda_xs.hpp index 2eb39e2a..b51ac191 100644 --- a/include/xo/reader/lambda_xs.hpp +++ b/include/xo/reader/lambda_xs.hpp @@ -98,6 +98,9 @@ namespace xo { /** explicit return type (if supplied) **/ TypeDescr explicit_return_td_ = nullptr; + /** lambda signature (when known) **/ + TypeDescr lambda_td_ = nullptr; + /** body expression **/ rp body_; diff --git a/include/xo/reader/parserstatemachine.hpp b/include/xo/reader/parserstatemachine.hpp index 2b8340e9..d243bbe2 100644 --- a/include/xo/reader/parserstatemachine.hpp +++ b/include/xo/reader/parserstatemachine.hpp @@ -9,6 +9,7 @@ #include "exprstatestack.hpp" #include "envframestack.hpp" #include "parser_result.hpp" +#include "xo/expression/typeinf/type_unifier.hpp" namespace xo { namespace scm { @@ -39,7 +40,11 @@ namespace xo { std::unique_ptr pop_exprstate(); exprstate & top_exprstate(); + /** get exprstate @p i levels from the top **/ + const exprstate & lookup_exprstate(size_t i) const; void push_exprstate(std::unique_ptr x); + /** @return number of stacked expression states **/ + size_t exprstate_stack_size() const { return xs_stack_.size(); } bool debug_flag() const { return debug_flag_; } @@ -55,6 +60,8 @@ namespace xo { /** @return available variable bindings in current parsing state **/ bp top_envframe() const; + /** @return frame @p i levels from the top **/ + bp lookup_envframe(std::size_t i) const; /** push frame @p x (with new variable bindings) onto environment stack **/ void push_envframe(const rp & x); /** @return pop innermost environment frame and return it **/ @@ -102,7 +109,11 @@ namespace xo { * pop when lambda body goes out of scope **/ envframestack env_stack_; - /** parser result state **/ + /** type inference/unification. + * apply to unresolved types associated with @ref Expression instances. + **/ + type_unifier unifier_; + /** parser result state (as of last call to @ref exprstate::on_input) **/ parser_result result_; /** enable/disable debug logging **/ bool debug_flag_ = false; diff --git a/src/reader/expect_expr_xs.cpp b/src/reader/expect_expr_xs.cpp index 68f5a12d..37a33bde 100644 --- a/src/reader/expect_expr_xs.cpp +++ b/src/reader/expect_expr_xs.cpp @@ -261,6 +261,14 @@ namespace xo { << ">"; } + bool + expect_expr_xs::pretty_print(const xo::print::ppindentinfo & ppii) const + { + return ppii.pps()->pretty_struct(ppii, "expect_expr_xs", + refrtag("allow_defs", allow_defs_), + refrtag("cxl_on_rightbrace", cxl_on_rightbrace_)); + } + } /*namespace scm*/ } /*namespace xo*/ diff --git a/src/reader/if_else_xs.cpp b/src/reader/if_else_xs.cpp index 2fada024..01d142e0 100644 --- a/src/reader/if_else_xs.cpp +++ b/src/reader/if_else_xs.cpp @@ -164,6 +164,30 @@ namespace xo { this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } + void + if_else_xs::finish_and_continue(parserstatemachine * p_psm) + { + rp if_expr = this->if_expr_; + std::unique_ptr self = p_psm->pop_exprstate(); + + if (this->ifxs_type_ == ifexprstatetype::if_4) { + /* if no else-branch, then if-expr can't have valuetype */ + if_expr->assign_valuetype(nullptr); + } + + p_psm->top_exprstate().on_expr(if_expr, p_psm); + } + + void + if_else_xs::on_rightbrace_token(const token_type & tk, + parserstatemachine * p_psm) + { + scope log(XO_DEBUG(p_psm->debug_flag())); + + this->finish_and_continue(p_psm); + p_psm->on_rightbrace_token(tk); + } + void if_else_xs::on_semicolon_token(const token_type & tk, parserstatemachine * p_psm) @@ -180,24 +204,18 @@ namespace xo { case ifexprstatetype::n_ifexprstatetype: // unreachable assert(false); - return; + break; case ifexprstatetype::if_1: case ifexprstatetype::if_2: case ifexprstatetype::if_3: case ifexprstatetype::if_5: this->illegal_input_on_token(c_self_name, tk, get_expect_str(), p_psm); - return; + break; case ifexprstatetype::if_4: case ifexprstatetype::if_6: { - rp if_expr = this->if_expr_; - - std::unique_ptr self = p_psm->pop_exprstate(); - TypeDescr td = nullptr; - - if_expr->assign_valuetype(td); - p_psm->top_exprstate().on_expr(if_expr, p_psm); - return; + this->finish_and_continue(p_psm); + break; } } } diff --git a/src/reader/lambda_xs.cpp b/src/reader/lambda_xs.cpp index bbbd19b8..e1dfe722 100644 --- a/src/reader/lambda_xs.cpp +++ b/src/reader/lambda_xs.cpp @@ -1,6 +1,7 @@ /* @file lambda_xs.cpp */ #include "lambda_xs.hpp" +#include "define_xs.hpp" #include "parserstatemachine.hpp" #include "exprstatestack.hpp" #include "expect_formal_arglist_xs.hpp" @@ -156,9 +157,55 @@ namespace xo { { constexpr const char * c_self_name = "lambda_xs::on_typedescr"; + assert(td); + if (lmxs_type_ == lambdastatetype::lm_3) { this->lmxs_type_ = lambdastatetype::lm_4; this->explicit_return_td_ = td; + + this->lambda_td_ = Lambda::assemble_lambda_td(local_env_->argv(), + explicit_return_td_); + + /* 1. at this point we know function signature (@ref lambda_td_) + * 2. if this lambda appears on the rhs of a define, + * propagate function signature to the define. + * 3. this makes recursive function definitions like this work + * without relying on type inference: + * def fact = lambda (n : i64) : i64 { + * if (n == 0) then + * 1 + * else + * n * fact(n - 1) + * } + * 4. while parsing the body of the lambda, we want environment + * to already associate the lambda's signature with variable 'fact', + * so that when parser encounters 'fact(n - 1)' the expression has + * known valuetype. + */ + + if ((p_psm->exprstate_stack_size() >= 3) + && (p_psm->lookup_exprstate(1).exs_type() == exprstatetype::expect_rhs_expression) + && (p_psm->lookup_exprstate(2).exs_type() == exprstatetype::defexpr) + && (p_psm->env_stack_size() >= 2) + ) + { + bp def_env = p_psm->lookup_envframe(1); + + assert(def_env->n_arg() == 1); + + bp def_var = def_env->lookup_arg(0).get(); + + if (def_var->valuetype() == nullptr) { + def_var->assign_valuetype(lambda_td_); + } else { + /* don't need to unify here. if def already hasa a type, + * that's because it was explicitly specified. + * will discover any conflict after reporting parsed lambda + * to define_xs + */ + } + } + expect_expr_xs::start(p_psm); /* control reenters via .on_expr() or .on_expr_with_semicolon() */ } else { @@ -194,7 +241,16 @@ namespace xo { /* top env frame recorded arguments to this lambda */ p_psm->pop_envframe(); - rp lm = Lambda::make_from_env(name, local_env_, explicit_return_td_, body_); + rp lm; + + /* TODO: unify explicit_return_td_ with body_ */ + + if (lambda_td_) { + lm = Lambda::make(name, lambda_td_, local_env_, body_); + } else { + lm = Lambda::make_from_env(name, local_env_, + explicit_return_td_, body_); + } p_psm->top_exprstate().on_expr(lm, p_psm); p_psm->top_exprstate().on_semicolon_token(tk, p_psm); diff --git a/src/reader/parser.cpp b/src/reader/parser.cpp index b650f297..fe55aa5f 100644 --- a/src/reader/parser.cpp +++ b/src/reader/parser.cpp @@ -52,8 +52,7 @@ namespace xo { const parser_result & parser::include_token(const token_type & tk) { - constexpr bool c_debug_flag = true; - scope log(XO_DEBUG(c_debug_flag), xtag("tk", tk)); + scope log(XO_DEBUG(psm_.debug_flag()), xtag("tk", tk)); if (psm_.xs_stack_.empty()) { throw std::runtime_error(tostr("parser::include_token", diff --git a/src/reader/parserstatemachine.cpp b/src/reader/parserstatemachine.cpp index b3121cc5..90c2d079 100644 --- a/src/reader/parserstatemachine.cpp +++ b/src/reader/parserstatemachine.cpp @@ -35,6 +35,11 @@ namespace xo { return xs_stack_.top_exprstate(); } + const exprstate & + parserstatemachine::lookup_exprstate(size_t i) const { + return *xs_stack_[i]; + } + void parserstatemachine::push_exprstate(std::unique_ptr x) { xs_stack_.push_exprstate(std::move(x)); @@ -45,6 +50,11 @@ namespace xo { return env_stack_.top_envframe(); } + bp + parserstatemachine::lookup_envframe(std::size_t i) const { + return env_stack_[i]; + } + void parserstatemachine::push_envframe(const rp & x) { constexpr bool c_debug_flag = true;