xo-reader2 stack: progress towards recognizing function calls [WIP]

This commit is contained in:
Roland Conybeare 2026-02-10 23:28:20 -05:00
commit 00dc45db9f
9 changed files with 234 additions and 37 deletions

View file

@ -27,11 +27,11 @@ namespace xo {
static DExpectExprSsm * make(DArena & parser_mm, static DExpectExprSsm * make(DArena & parser_mm,
bool allow_defs, bool allow_defs,
bool cxl_on_rightparen); bool cxl_on_rightbrace);
static void start(DArena & parser_mm, static void start(DArena & parser_mm,
bool allow_defs, bool allow_defs,
bool cxl_on_rightparen, bool cxl_on_rightbrace,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
static void start(ParserStateMachine * p_psm); static void start(ParserStateMachine * p_psm);

View file

@ -87,6 +87,12 @@ namespace xo {
void on_leftparen_token(const Token & tk, void on_leftparen_token(const Token & tk,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
/** update ssm state for incoming rightparen token @p tk
* with overall parser state in @p p_psm
**/
void on_rightparen_token(const Token & tk,
ParserStateMachine * p_psm);
///@} ///@}
/** @defgroup scm-parenssm-ssm-facet syntaxstatemachine facet methods **/ /** @defgroup scm-parenssm-ssm-facet syntaxstatemachine facet methods **/
///@{ ///@{
@ -101,6 +107,12 @@ namespace xo {
void on_token(const Token & tk, void on_token(const Token & tk,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
/** update ssm for expression @p expr (emitted by nested ssm),
* with overall parser state in @p p_psm
**/
void on_parsed_expression(obj<AExpression> expr,
ParserStateMachine * p_psm);
///@} ///@}
/** @defgroup scm-parenssm-printable-facet printable facet methods **/ /** @defgroup scm-parenssm-printable-facet printable facet methods **/
///@{ ///@{

View file

@ -100,6 +100,8 @@ namespace xo {
obj<AExpression> lhs, obj<AExpression> lhs,
optype op); optype op);
static void start(DArena & parser_mm,
ParserStateMachine * p_psm);
static void start(DArena & parser_mm, static void start(DArena & parser_mm,
obj<AExpression> lhs, obj<AExpression> lhs,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
@ -132,21 +134,9 @@ namespace xo {
/** @defgroup scm-progressssm-methods general methods **/ /** @defgroup scm-progressssm-methods general methods **/
///@{ ///@{
/** token belongs to surrounding syntax, /** handle leftparen token @p tk. Overall parser state in @p p_psm **/
* -> lock in current progress void on_leftparen_token(const Token & tk,
**/ ParserStateMachine * p_psm);
void on_completing_token(const Token & tk,
ParserStateMachine * p_psm);
///@}
/** @defgroup scm-progressssm-ssm-facet syntaxstatemachine facet methods **/
/// @{
/** operate state machine for this syntax on incoming token @p tk
* with overall parser state in @p p_psm
**/
void on_token(const Token & tk,
ParserStateMachine * p_psm);
void on_symbol_token(const Token & tk, void on_symbol_token(const Token & tk,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
@ -168,6 +158,25 @@ namespace xo {
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
void on_rightbrace_token(const Token & tk, void on_rightbrace_token(const Token & tk,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);
/** token belongs to surrounding syntax,
* -> lock in current progress
**/
void on_completing_token(const Token & tk,
ParserStateMachine * p_psm);
///@}
/** @defgroup scm-progressssm-ssm-facet syntaxstatemachine facet methods **/
/// @{
/** operate state machine for this syntax on incoming token @p tk
* with overall parser state in @p p_psm
**/
void on_token(const Token & tk,
ParserStateMachine * p_psm);
void on_parsed_expression(obj<AExpression> expr,
ParserStateMachine * p_psm);
void on_parsed_expression_with_token(obj<AExpression> expr, void on_parsed_expression_with_token(obj<AExpression> expr,
const Token & tk, const Token & tk,
ParserStateMachine * p_psm); ParserStateMachine * p_psm);

View file

@ -0,0 +1,12 @@
/** @file ProgressSsm.hpp
*
* @author Roland Conybeare, Feb 2026
**/
#pragma once
#include "DProgressSsm.hpp"
#include "ssm/ISyntaxStateMachine_DProgressSsm.hpp"
#include "ssm/IPrintable_DProgressSsm.hpp"
/* end ProgressSsm.hpp */

View file

@ -7,7 +7,7 @@
#include "ssm/ISyntaxStateMachine_DExprSeqState.hpp" #include "ssm/ISyntaxStateMachine_DExprSeqState.hpp"
#include "DDefineSsm.hpp" #include "DDefineSsm.hpp"
#include "DLambdaSsm.hpp" #include "DLambdaSsm.hpp"
#include "DProgressSsm.hpp" #include "ProgressSsm.hpp"
#include "DIfElseSsm.hpp" #include "DIfElseSsm.hpp"
#include "ParenSsm.hpp" #include "ParenSsm.hpp"
#include "ExpectExprSsm.hpp" #include "ExpectExprSsm.hpp"
@ -400,7 +400,13 @@ namespace xo {
{ {
switch (seqtype_) { switch (seqtype_) {
case exprseqtype::toplevel_interactive: { case exprseqtype::toplevel_interactive: {
DParenSsm::start(p_psm); // not sufficient to just start a paren-ssm here.
// we want to parse toplevel input like
// (getfunction())();
// just as C would.
// To wait for token following right paren, use a progress-ssm
DProgressSsm::start(p_psm->parser_alloc(), p_psm);
p_psm->on_token(Token::leftparen_token()); p_psm->on_token(Token::leftparen_token());
return; return;

View file

@ -4,6 +4,7 @@
**/ **/
#include "ParenSsm.hpp" #include "ParenSsm.hpp"
#include "ExpectExprSsm.hpp"
#include "syntaxstatetype.hpp" #include "syntaxstatetype.hpp"
#include <string_view> #include <string_view>
@ -70,9 +71,9 @@ namespace xo {
case parenexprstatetype::invalid: case parenexprstatetype::invalid:
case parenexprstatetype::N: case parenexprstatetype::N:
break; break;
case parenexprstatetype::lparen_0: return "lparen_0"; case parenexprstatetype::lparen_0: return "leftparen";
case parenexprstatetype::lparen_1: return "lparen_1"; case parenexprstatetype::lparen_1: return "expression";
case parenexprstatetype::lparen_2: return "lparen_2"; case parenexprstatetype::lparen_2: return "rightparen";
} }
return "???parenexprstatetype"; return "???parenexprstatetype";
@ -83,9 +84,15 @@ namespace xo {
ParserStateMachine * p_psm) ParserStateMachine * p_psm)
{ {
switch (tk.tk_type()) { switch (tk.tk_type()) {
case tokentype::tk_leftparen: case tokentype::tk_leftparen:
this->on_leftparen_token(tk, p_psm); this->on_leftparen_token(tk, p_psm);
return; return;
case tokentype::tk_rightparen:
this->on_rightparen_token(tk, p_psm);
return;
// all the not-yet handled cases // all the not-yet handled cases
case tokentype::tk_symbol: case tokentype::tk_symbol:
case tokentype::tk_def: case tokentype::tk_def:
@ -98,7 +105,6 @@ namespace xo {
case tokentype::tk_i64: case tokentype::tk_i64:
case tokentype::tk_bool: case tokentype::tk_bool:
case tokentype::tk_if: case tokentype::tk_if:
case tokentype::tk_rightparen:
case tokentype::tk_leftbracket: case tokentype::tk_leftbracket:
case tokentype::tk_rightbracket: case tokentype::tk_rightbracket:
case tokentype::tk_leftbrace: case tokentype::tk_leftbrace:
@ -136,6 +142,25 @@ namespace xo {
DParenSsm::on_leftparen_token(const Token & tk, DParenSsm::on_leftparen_token(const Token & tk,
ParserStateMachine * p_psm) ParserStateMachine * p_psm)
{ {
if (parenstate_ == parenexprstatetype::lparen_0) {
this->parenstate_ = parenexprstatetype::lparen_1;
/** 1. allow_defs=false not allowing definitions immediately
* within a parenthesized expression.
* e.g.
* (def y : i64 = 4; x + y) // nope
* 2. cxl_on_rightparen=false expression _must_ be followed
* by rightparen. empty parentheses '()'
* do not denote anything, in expression context
**/
DExpectExprSsm::start(p_psm->parser_alloc(),
false /*!allow_defs*/,
false /*cx_on_rightbrace*/,
p_psm);
return;
}
Super::on_token(tk, p_psm); Super::on_token(tk, p_psm);
} }
@ -255,7 +280,24 @@ namespace xo {
this->illegal_input_error(c_self_name, tk); this->illegal_input_error(c_self_name, tk);
} }
#endif
void
DParenSsm::on_rightparen_token(const Token & tk,
ParserStateMachine * p_psm)
{
if (this->parenstate_ == parenexprstatetype::lparen_2) {
// parenthesized expression successfully parsed
p_psm->pop_ssm();
p_psm->on_parsed_expression(this->expr_);
return;
}
Super::on_token(tk, p_psm);
}
#ifdef NOT_YET
void void
paren_xs::on_rightparen_token(const token_type & tk, paren_xs::on_rightparen_token(const token_type & tk,
parserstatemachine * p_psm) parserstatemachine * p_psm)
@ -302,7 +344,24 @@ namespace xo {
this->illegal_input_error(c_self_name, tk); this->illegal_input_error(c_self_name, tk);
} }
#endif
void
DParenSsm::on_parsed_expression(obj<AExpression> expr,
ParserStateMachine * p_psm)
{
if (parenstate_ == parenexprstatetype::lparen_1) {
this->parenstate_ = parenexprstatetype::lparen_2;
this->expr_ = expr;
return;
}
Super::on_parsed_expression(expr, p_psm);
}
#ifdef NOT_YET
void void
paren_xs::on_expr(bp<Expression> expr, paren_xs::on_expr(bp<Expression> expr,
parserstatemachine * p_psm) parserstatemachine * p_psm)

View file

@ -9,6 +9,8 @@
#include "DExpectExprSsm.hpp" #include "DExpectExprSsm.hpp"
#include "ssm/ISyntaxStateMachine_DExpectExprSsm.hpp" #include "ssm/ISyntaxStateMachine_DExpectExprSsm.hpp"
#include "ParenSsm.hpp"
#include <xo/expression2/DApplyExpr.hpp> #include <xo/expression2/DApplyExpr.hpp>
#include <xo/expression2/detail/IExpression_DApplyExpr.hpp> #include <xo/expression2/detail/IExpression_DApplyExpr.hpp>
@ -185,6 +187,13 @@ namespace xo {
start(parser_mm, lhs, optype::invalid, p_psm); start(parser_mm, lhs, optype::invalid, p_psm);
} }
void
DProgressSsm::start(DArena & parser_mm,
ParserStateMachine * p_psm)
{
start(parser_mm, obj<AExpression>(), p_psm);
}
DProgressSsm::DProgressSsm(obj<AExpression> valex, DProgressSsm::DProgressSsm(obj<AExpression> valex,
optype op) optype op)
: lhs_{valex}, : lhs_{valex},
@ -204,10 +213,12 @@ namespace xo {
std::string_view std::string_view
DProgressSsm::get_expect_str() const noexcept { DProgressSsm::get_expect_str() const noexcept {
if (op_type_ == optype::invalid) { if (!lhs_) {
return "expr1|leftparen";
} else if (op_type_ == optype::invalid) {
return "oper|semicolon|rightparen|righbrace"; return "oper|semicolon|rightparen|righbrace";
} else { } else {
return "expr|leftparen"; return "expr2|leftparen";
} }
} }
@ -259,11 +270,14 @@ namespace xo {
this->on_rightbrace_token(tk, p_psm); this->on_rightbrace_token(tk, p_psm);
return; return;
case tokentype::tk_leftparen:
this->on_leftparen_token(tk, p_psm);
return;
// all the not-yet handled cases // all the not-yet handled cases
case tokentype::tk_invalid: case tokentype::tk_invalid:
case tokentype::tk_def: case tokentype::tk_def:
case tokentype::tk_if: case tokentype::tk_if:
case tokentype::tk_leftparen:
case tokentype::tk_rightparen: case tokentype::tk_rightparen:
case tokentype::tk_leftbracket: case tokentype::tk_leftbracket:
case tokentype::tk_rightbracket: case tokentype::tk_rightbracket:
@ -483,13 +497,48 @@ namespace xo {
p_psm->on_parsed_expression_with_token(expr, tk); p_psm->on_parsed_expression_with_token(expr, tk);
} }
void
DProgressSsm::on_parsed_expression(obj<AExpression> expr,
ParserStateMachine * p_psm)
{
const bool c_debug_flag = p_psm->debug_flag() || true;
scope log(XO_DEBUG(c_debug_flag));
if (!lhs_) {
log && log("accepting expr1");
this->lhs_ = expr;
return;
}
Super::on_parsed_expression(expr, p_psm);
}
void void
DProgressSsm::on_parsed_expression_with_token(obj<AExpression> expr, DProgressSsm::on_parsed_expression_with_token(obj<AExpression> expr,
const Token & tk, const Token & tk,
ParserStateMachine * p_psm) ParserStateMachine * p_psm)
{ {
scope log(XO_DEBUG(p_psm->debug_flag()), const bool c_debug_flag = p_psm->debug_flag() || true;
xtag("expr", expr));
scope log(XO_DEBUG(c_debug_flag),
xtag("expr", expr),
xtag("tk", tk));
#ifdef NOT_YET
if (!lhs_) {
log && log("DProgressSsm: accepting expr1");
this->lhs_ = expr;
// now we have to handle tk!
return;
}
#endif
// here: have lhs_ expression
if (op_type_ == optype::invalid) { if (op_type_ == optype::invalid) {
// e.g. control here on input like // e.g. control here on input like
@ -499,6 +548,7 @@ namespace xo {
("DProgressSsm::on_parsed_expression_with_token", ("DProgressSsm::on_parsed_expression_with_token",
expr, expr,
this->get_expect_str()); this->get_expect_str());
return; return;
} }
@ -847,7 +897,32 @@ namespace xo {
{ {
this->on_operator_token(tk, p_psm); this->on_operator_token(tk, p_psm);
} }
#endif
void
DProgressSsm::on_leftparen_token(const Token & tk,
ParserStateMachine * p_psm)
{
if (!lhs_) {
// leftparen begins possible lhs expression
DParenSsm::start(p_psm);
p_psm->on_token(Token::leftparen_token());
return;
}
if (optype_ == optype::invalid) {
// leftparen begins function call arguments.
// .lhs_ now understood to be expression that evaluates to a
// function
}
Super::on_token(tk, p_psm);
}
#ifdef NOT_YET
/* editor bait: on_lparen */ /* editor bait: on_lparen */
void void
progress_xs::on_leftparen_token(const token_type & tk, progress_xs::on_leftparen_token(const token_type & tk,
@ -885,8 +960,8 @@ namespace xo {
return; return;
} }
constexpr const char * c_self_name = "exprstate::on_leftparen";
const char * exp = get_expect_str(); const char * exp = get_expect_str();
constexpr const char * c_self_name = "exprstate::on_leftparen";
this->illegal_input_on_token(c_self_name, tk, exp, p_psm); this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
} }
@ -1023,18 +1098,20 @@ namespace xo {
log && log(xtag("rhs_.tseq", rhs_._typeseq())); log && log(xtag("rhs_.tseq", rhs_._typeseq()));
obj<APrintable> lhs obj<APrintable> lhs
= FacetRegistry::instance().variant<APrintable,AExpression>(lhs_); = FacetRegistry::instance().try_variant<APrintable,AExpression>(lhs_);
obj<APrintable> rhs obj<APrintable> rhs
= FacetRegistry::instance().try_variant<APrintable,AExpression>(rhs_); = FacetRegistry::instance().try_variant<APrintable,AExpression>(rhs_);
bool lhs_present = lhs;
bool rhs_present = rhs; bool rhs_present = rhs;
bool op_present = (op_type_ != optype::invalid);
return ppii.pps()->pretty_struct return ppii.pps()->pretty_struct
(ppii, (ppii,
"DProgressSsm", "DProgressSsm",
refrtag("lhs", lhs), refrtag("lhs", lhs, lhs_present),
refrtag("op", op_type_), refrtag("op", op_type_, op_present),
refrtag("rhs", rhs, rhs_present), refrtag("rhs", rhs, rhs_present),
refrtag("expect", this->get_expect_str()) refrtag("expect", this->get_expect_str())
); );

View file

@ -426,8 +426,8 @@ namespace xo {
// - want to write error message using DArena // - want to write error message using DArena
// - need something like log_streambuf and/or tostr() that's arena-aware // - need something like log_streambuf and/or tostr() that's arena-aware
obj<APrintable> expr_pr auto expr_pr = expr.to_facet<APrintable>();
= FacetRegistry::instance().variant<APrintable,AExpression>(expr); //= FacetRegistry::instance().variant<APrintable,AExpression>(expr);
assert(expr_pr); assert(expr_pr);
/** TODO /** TODO

View file

@ -1176,12 +1176,34 @@ namespace xo {
log && log(xtag("parser", &parser)); log && log(xtag("parser", &parser));
log && log(xtag("result", result)); log && log(xtag("result", result));
REQUIRE(parser.has_incomplete_expr() == false); REQUIRE(parser.has_incomplete_expr() == true);
REQUIRE(!result.is_error()); REQUIRE(!result.is_error());
REQUIRE(result.is_expression()); REQUIRE(result.is_incomplete());
REQUIRE(result.result_expr());
} }
{
auto & result = parser.on_token(Token::rightparen_token());
log && log("after rightparen token:");
log && log(xtag("parser", &parser));
log && log(xtag("result", result));
REQUIRE(parser.has_incomplete_expr() == true);
REQUIRE(!result.is_error());
REQUIRE(result.is_incomplete());
}
{
auto & result = parser.on_token(Token::leftparen_token());
log && log("after leftparen token:");
log && log(xtag("parser", &parser));
log && log(xtag("result", result));
REQUIRE(parser.has_incomplete_expr() == true);
REQUIRE(!result.is_error());
REQUIRE(result.is_incomplete());
}
} }
} /*namespace ut*/ } /*namespace ut*/
} /*namespace xo*/ } /*namespace xo*/