xo-tokenizer xo-reader: + bool literals + if-expr parsing

This commit is contained in:
Roland Conybeare 2025-07-22 07:20:56 -05:00
commit ce760bd5cf
30 changed files with 848 additions and 74 deletions

View file

@ -9,6 +9,7 @@ set(SELF_SRCS
exprstate.cpp
exprstatestack.cpp
define_xs.cpp
if_else_xs.cpp
progress_xs.cpp
paren_xs.cpp
sequence_xs.cpp

View file

@ -14,7 +14,7 @@ namespace xo {
const char *
defexprstatetype_descr(defexprstatetype x) {
switch(x) {
switch (x) {
case defexprstatetype::invalid: return "invalid";
case defexprstatetype::def_0: return "def_0";
case defexprstatetype::def_1: return "def_1";
@ -208,7 +208,7 @@ namespace xo {
return;
}
constexpr const char * c_self_name = "define_xs::on_symbol";
constexpr const char * c_self_name = "define_xs::on_def_token";
const char * exp = this->get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);

View file

@ -129,6 +129,8 @@ namespace xo {
log && log(xtag("tk", tk));
constexpr const char * c_self_name = "expect_expr_xs::on_symbol_token";
/* various possibilities when looking for rhs expression:
*
* x := y // (1)
@ -143,10 +145,8 @@ namespace xo {
bp<Variable> var = p_psm->lookup_var(tk.text());
if (!var) {
throw std::runtime_error
(tostr("expect_expr_xs::on_symbol_token",
": unknown symbol",
xtag("name", tk.text())));
this->unknown_variable_error(c_self_name, tk, p_psm);
return;
}
/* e.g.
@ -173,12 +173,22 @@ namespace xo {
return;
}
void
expect_expr_xs::on_bool_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
progress_xs::start
(Constant<bool>::make(tk.bool_value()),
p_psm);
}
void
expect_expr_xs::on_i64_token(const token_type & tk,
parserstatemachine * p_psm)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
scope log(XO_DEBUG(p_psm->debug_flag()));
progress_xs::start
(Constant<int64_t>::make(tk.i64_value()),

View file

@ -7,6 +7,7 @@
//#include "expect_expr_xs.hpp"
#include "progress_xs.hpp"
#include "define_xs.hpp"
#include "if_else_xs.hpp"
#include "expect_symbol_xs.hpp"
#include "xo/expression/Constant.hpp"
@ -30,6 +31,24 @@ namespace xo {
{
}
const char *
exprseq_xs::get_expect_str() const
{
/*
* def...
* ^
* exprseq_xs
*/
switch (this->xseqtype_) {
case exprseqtype::toplevel_interactive:
return "def|expression";
case exprseqtype::toplevel_batch:
return "def";
}
return "?expect";
}
void
exprseq_xs::on_def_token(const token_type & /*tk*/,
parserstatemachine * p_psm)
@ -45,6 +64,25 @@ namespace xo {
*/
}
void
exprseq_xs::on_if_token(const token_type & tk,
parserstatemachine * p_psm)
{
if (xseqtype_ == exprseqtype::toplevel_interactive)
{
/* in interactive session, allow top-level if-expressions.
* Could be:
* if sometest() do_something() do_otherthing();
*/
if_else_xs::start(p_psm);
} else {
constexpr const char * c_self_name = "exprseq_xs::on_if_token";
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
}
void
exprseq_xs::on_symbol_token(const token_type & tk,
parserstatemachine * p_psm)
@ -72,7 +110,39 @@ namespace xo {
/* policy: don't allow variable references as toplevel expressions
* unless interactive session
*/
this->illegal_input_error(c_self_name, tk);
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name,
tk,
exp,
p_psm);
}
}
void
exprseq_xs::on_bool_token(const token_type & tk,
parserstatemachine * p_psm)
{
using xo::ast::Constant;
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * c_self_name = "exprseq_xs::on_bool_token";
if (xseqtype_ == exprseqtype::toplevel_interactive)
{
progress_xs::start(Constant<bool>::make(tk.bool_value()), p_psm);
} else {
/* policy: don't allow literals as toplevel expressions
* unless interactive session
*/
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name,
tk,
exp,
p_psm);
}
}
@ -94,7 +164,12 @@ namespace xo {
/* policy: don't allow literals as toplevel expressions
* unless interactive session.
*/
this->illegal_input_error(c_self_name, tk);
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name,
tk,
exp,
p_psm);
}
}
@ -115,7 +190,6 @@ namespace xo {
* arbitrary number of expressions.
*/
*(p_psm->p_result_) = parser_result::expression(expr.promote());
}

View file

@ -40,6 +40,8 @@ namespace xo {
return "sequenceexpr";
case exprstatetype::let1expr:
return "let1expr";
case exprstatetype::ifexpr:
return "ifexpr";
case exprstatetype::expect_rhs_expression:
return "expect_rhs_expression";
case exprstatetype::expect_symbol:
@ -279,6 +281,55 @@ namespace xo {
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
exprstate::on_if_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
constexpr const char * c_self_name = "exprstate::on_if_token";
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
exprstate::on_then_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
constexpr const char * c_self_name = "exprstate::on_then_token";
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
exprstate::on_else_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
constexpr const char * c_self_name = "exprstate::on_else_token";
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
exprstate::on_bool_token(const token_type & tk,
parserstatemachine * p_psm)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * c_self_name = "exprstate::on_bool";
const char * exp = get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
exprstate::on_i64_token(const token_type & tk,
parserstatemachine * p_psm)
@ -326,6 +377,10 @@ namespace xo {
this->on_lambda_token(tk, p_psm);
return;
case tokentype::tk_bool:
this->on_bool_token(tk, p_psm);
return;
case tokentype::tk_i64:
this->on_i64_token(tk, p_psm);
return;
@ -402,7 +457,21 @@ namespace xo {
return;
case tokentype::tk_type:
assert(false);
return;
case tokentype::tk_if:
this->on_if_token(tk, p_psm);
return;
case tokentype::tk_then:
this->on_then_token(tk, p_psm);
return;
case tokentype::tk_else:
this->on_else_token(tk, p_psm);
return;
case tokentype::tk_let:
case tokentype::tk_in:

View file

@ -89,7 +89,7 @@ namespace xo {
/** always multiple lines if more than one element in stack **/
if ((stack_.size() > 0)
&& !pps->print_upto_tag("[0]", *stack_[0].get()))
&& !pps->print_upto_tag("[0]", stack_[0].get()))
{
return false;
}
@ -105,7 +105,7 @@ namespace xo {
for (std::size_t i = 0, z = stack_.size(); i < z; ++i) {
std::string i_str = tostr("[", z-i-1, "]");
pps->newline_pretty_tag(ppii.ci1(), i_str, *stack_[i].get());
pps->newline_pretty_tag(ppii.ci1(), i_str, stack_[i].get());
}
pps->write(">");

View file

@ -0,0 +1,269 @@
/** @file if_else_xs.cpp
*
* author: Roland Conybeare, Jul 2025
**/
#include "if_else_xs.hpp"
//#include "exprstatestack.hpp"
#include "parserstatemachine.hpp"
#include "expect_expr_xs.hpp"
namespace xo {
namespace scm {
// ----- ifexprstatetype -----
const char *
ifexprstatetype_descr(ifexprstatetype x) {
switch (x) {
case ifexprstatetype::invalid: return "invalid";
case ifexprstatetype::if_0: return "if_0";
case ifexprstatetype::if_1: return "if_1";
case ifexprstatetype::if_2: return "if_2";
case ifexprstatetype::if_3: return "if_3";
case ifexprstatetype::if_4: return "if_4";
case ifexprstatetype::if_5: return "if_5";
case ifexprstatetype::if_6: return "if_6";
case ifexprstatetype::n_ifexprstatetype: break;
}
return "???ifexprstatetype";
}
std::ostream &
operator<<(std::ostream & os, ifexprstatetype x) {
os << ifexprstatetype_descr(x);
return os;
}
// ----- if_else_xs -----
std::unique_ptr<if_else_xs>
if_else_xs::make() {
return std::make_unique<if_else_xs>(if_else_xs(IfExprAccess::make_empty()));
}
void
if_else_xs::start(parserstatemachine * p_psm)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
p_psm->push_exprstate(if_else_xs::make());
p_psm->top_exprstate().on_if_token(token_type::if_token(), p_psm);
}
if_else_xs::if_else_xs(rp<IfExprAccess> if_expr)
: exprstate(exprstatetype::ifexpr),
ifxs_type_{ifexprstatetype::if_0},
if_expr_{std::move(if_expr)}
{}
const char *
if_else_xs::get_expect_str() const
{
/**
* if test-expr then then-expr else else-expr ;
* ^ ^ ^ ^ ^ ^ ^
* | | | | | | |
* | if_1 if_2 if_3 if_4 if_5 if_6
* if_0
*
* if_0 --on_if_token()--> if_1
* if_1 --on_expr()--> if_2
* if_2 --on_then_token()--> if_3
* if_3 --on_expr()--> if_4
* if_4 --on_else_token()--> if_5
* --on_semicolon_token()--> (done)
* if_5 --on_expr()-->if_6
* if_6 --on_semicolon_token()--> (done)
**/
switch (this->ifxs_type_) {
case ifexprstatetype::invalid:
case ifexprstatetype::if_0:
case ifexprstatetype::n_ifexprstatetype:
assert(false); // unreachable
return nullptr;
case ifexprstatetype::if_1:
return "expression";
case ifexprstatetype::if_2:
return "then";
case ifexprstatetype::if_3:
return "expression";
case ifexprstatetype::if_4:
return "else|semicolon";
case ifexprstatetype::if_5:
return "expression";
case ifexprstatetype::if_6:
return "semicolon";
}
return "?expect";
}
void
if_else_xs::on_if_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
log && log("ifxs_type", ifxs_type_);
if (this->ifxs_type_ == ifexprstatetype::if_0) {
this->ifxs_type_ = ifexprstatetype::if_1;
expect_expr_xs::start(p_psm);
return;
}
constexpr const char * c_self_name = "if_else_xs::on_if_token";
const char * exp = this->get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
if_else_xs::on_then_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
log && log("ifxs_type", ifxs_type_);
if (this->ifxs_type_ == ifexprstatetype::if_2) {
this->ifxs_type_ = ifexprstatetype::if_3;
expect_expr_xs::start(p_psm);
return;
}
constexpr const char * c_self_name = "if_else_xs::on_then_token";
const char * exp = this->get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
if_else_xs::on_else_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
log && log("ifxs_type", ifxs_type_);
if (this->ifxs_type_ == ifexprstatetype::if_4) {
this->ifxs_type_ = ifexprstatetype::if_5;
expect_expr_xs::start(p_psm);
return;
}
constexpr const char * c_self_name = "if_else_xs::on_else_token";
const char * exp = this->get_expect_str();
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
if_else_xs::on_semicolon_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
log && log("ifxs_type", ifxs_type_);
const char * c_self_name = "if_else_xs::on_semicolon_token";
switch (this->ifxs_type_) {
case ifexprstatetype::invalid:
case ifexprstatetype::if_0:
case ifexprstatetype::n_ifexprstatetype:
// unreachable
assert(false);
return;
case ifexprstatetype::if_1:
case ifexprstatetype::if_2:
case ifexprstatetype::if_3:
case ifexprstatetype::if_5:
this->illegal_input_on_token(c_self_name, tk, get_expect_str(), p_psm);
return;
case ifexprstatetype::if_4:
case ifexprstatetype::if_6: {
rp<IfExprAccess> if_expr = this->if_expr_;
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
TypeDescr td = nullptr;
if_expr->assign_valuetype(td);
p_psm->top_exprstate().on_expr(if_expr, p_psm);
return;
}
}
}
void
if_else_xs::on_expr(bp<Expression> expr,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
log && log(xtag("ifxs_type", ifxs_type_));
switch (this->ifxs_type_) {
case ifexprstatetype::invalid:
case ifexprstatetype::if_0:
case ifexprstatetype::n_ifexprstatetype:
assert(false); // unreachable
return;
case ifexprstatetype::if_1:
if_expr_->assign_test(expr.promote());
ifxs_type_ = ifexprstatetype::if_2;
return;
case ifexprstatetype::if_2:
/** error: expecting 'then' **/
break;
case ifexprstatetype::if_3:
if_expr_->assign_when_true(expr.promote());
ifxs_type_ = ifexprstatetype::if_4;
return;
case ifexprstatetype::if_4:
/** error: expecting 'else' or ';' **/
break;
case ifexprstatetype::if_5:
if_expr_->assign_when_false(expr.promote());
ifxs_type_ = ifexprstatetype::if_6;
return;
case ifexprstatetype::if_6:
/** error: expecting ';' **/
break;
}
constexpr const char* c_self_name = "if_else_xs::on_expr";
const char * exp = get_expect_str();
this->illegal_input_on_expr(c_self_name, expr, exp, p_psm);
}
void
if_else_xs::on_expr_with_semicolon(bp<Expression> expr,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
log && log(xtag("ifxs_type", ifxs_type_));
this->on_expr(expr, p_psm);
this->on_semicolon_token(token_type::semicolon(), p_psm);
}
void
if_else_xs::print(std::ostream & os) const {
os << "<if_else_xs"
<< xtag("this", (void*)this)
<< xtag("ifxs_type", ifxs_type_)
<< ">";
}
} /*namespace scm*/
} /*namespace xo*/

View file

@ -29,8 +29,8 @@ namespace xo {
namespace scm {
// ----- parser -----
parser::parser()
: xs_stack_{}, env_stack_{}, result_{}
parser::parser(bool debug_flag)
: xs_stack_{}, env_stack_{}, result_{}, debug_flag_{debug_flag}
{
/* top-level environment. initially empty */
rp<LocalEnv> toplevel_env = LocalEnv::make_empty();
@ -48,7 +48,8 @@ namespace xo {
parser::begin_interactive_session() {
parserstatemachine psm(&xs_stack_,
&env_stack_,
&result_);
&result_,
debug_flag_);
exprseq_xs::start(exprseqtype::toplevel_interactive, &psm);
}
@ -57,7 +58,8 @@ namespace xo {
parser::begin_translation_unit() {
parserstatemachine psm(&xs_stack_,
&env_stack_,
&result_);
&result_,
debug_flag_);
exprseq_xs::start(exprseqtype::toplevel_batch, &psm);
}
@ -79,7 +81,7 @@ namespace xo {
log && log(xtag("top", xs_stack_.top_exprstate()));
parserstatemachine psm(&xs_stack_, &env_stack_, &result_);
parserstatemachine psm(&xs_stack_, &env_stack_, &result_, debug_flag_);
xs_stack_.top_exprstate().on_input(tk, &psm);

View file

@ -146,8 +146,7 @@ namespace xo {
void
parserstatemachine::on_rightbrace_token(const token_type & tk)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
scope log(XO_DEBUG(debug_flag_));
log && log(xtag("tk", tk),
xtag("psm", *this));
@ -156,6 +155,30 @@ namespace xo {
->top_exprstate().on_rightbrace_token(tk, this);
}
void
parserstatemachine::on_then_token(const token_type & tk)
{
scope log(XO_DEBUG(debug_flag_));
log && log(xtag("tk", tk),
xtag("psm", *this));
this->p_stack_
->top_exprstate().on_then_token(tk, this);
}
void
parserstatemachine::on_else_token(const token_type & tk)
{
scope log(XO_DEBUG(debug_flag_));
log && log(xtag("tk", tk),
xtag("psm", *this));
this->p_stack_
->top_exprstate().on_else_token(tk, this);
}
void
parserstatemachine::on_error(const char * self_name, std::string errmsg)
{

View file

@ -104,7 +104,7 @@ namespace xo {
}
rp<Expression>
progress_xs::assemble_expr() {
progress_xs::assemble_expr(parserstatemachine * p_psm) {
/* need to defer building Apply incase expr followed by higher-precedence operator:
* consider input like
* 3.14 + 2.0 * ...
@ -113,10 +113,11 @@ namespace xo {
constexpr const char * c_self_name = "progress_xs::assemble_expr";
if ((op_type_ != optype::invalid) && (rhs_.get() == nullptr)) {
throw std::runtime_error(tostr(c_self_name,
": expected expr on rhs of operator",
xtag("lhs", lhs_),
xtag("op", op_type_)));
std::string errmsg = tostr("expected expression on rhs of operator op",
xtag("lhs", lhs_),
xtag("op", op_type_));
p_psm->on_error(c_self_name, errmsg);
}
/* consecutive expressions not legal, e.g:
@ -220,7 +221,7 @@ namespace xo {
// this->on_semicolon_token(token_type::semicolon(), p_psm);
// INSTEAD, spell out the body
rp<Expression> expr2 = this->assemble_expr();
rp<Expression> expr2 = this->assemble_expr(p_psm);
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
@ -261,10 +262,9 @@ namespace xo {
{
/* note: implementation parallels .on_rightparen_token() */
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
scope log(XO_DEBUG(p_psm->debug_flag()));
rp<Expression> expr = this->assemble_expr();
rp<Expression> expr = this->assemble_expr(p_psm);
log && log(xtag("assembled-expr", expr));
@ -316,9 +316,7 @@ namespace xo {
{
/* note: implementation parallels .on_semicolon_token() */
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
scope log(XO_DEBUG(p_psm->debug_flag()));
constexpr const char * self_name = "progress_xs::on_rightparen";
@ -336,7 +334,7 @@ namespace xo {
*/
/* right paren confirms stack expression */
rp<Expression> expr = this->assemble_expr();
rp<Expression> expr = this->assemble_expr(p_psm);
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
@ -353,6 +351,49 @@ namespace xo {
p_psm->top_exprstate().on_rightparen_token(tk, p_psm);
}
void
progress_xs::on_then_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
rp<Expression> expr = this->assemble_expr(p_psm);
log && log(xtag("assembled-expr", expr));
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
p_psm->on_expr(expr);
p_psm->on_then_token(tk);
/* control here on input like:
*
* if a > b then..
*
*/
}
void
progress_xs::on_else_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
rp<Expression> expr = this->assemble_expr(p_psm);
log && log(xtag("assembled-expr", expr));
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
p_psm->on_expr(expr);
p_psm->on_else_token(tk);
/* control here on input like:
*
* if a > b then c else..
*/
}
namespace {
optype
tk2op(const tokentype & tktype) {
@ -406,7 +447,7 @@ namespace xo {
*/
/* 1. instantiate expression for *this */
auto expr = this->assemble_expr();
auto expr = this->assemble_expr(p_psm);
/* 2. remove from stack */
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();

View file

@ -4,6 +4,10 @@
namespace xo {
namespace scm {
reader::reader(bool debug_flag)
: parser_{debug_flag}
{}
void
reader::begin_interactive_session() {
parser_.begin_interactive_session();