xo-reader: feat: handle parenthesized expressions

This commit is contained in:
Roland Conybeare 2024-08-07 11:52:20 -04:00
commit 6ff2ac97b0
3 changed files with 277 additions and 34 deletions

View file

@ -25,6 +25,12 @@ namespace xo {
def_2,
def_3,
def_4,
def_5,
/* lparen_0: look for expression; capture + advance to lparen_1 */
lparen_0,
/* lparen_1: expect rightparen */
lparen_1,
expect_rhs_expression,
expect_symbol,
@ -46,6 +52,17 @@ namespace xo {
class exprstatestack;
#ifdef NOT_YET
class exprstateaux {
public:
};
class lparen_xsa : public exprstateaux {
public:
private:
};
#endif
/** state associated with a partially-parsed expression.
**/
class exprstate {
@ -84,6 +101,9 @@ namespace xo {
static exprstate def_0(rp<DefineExprAccess> def_expr) {
return exprstate(exprstatetype::def_0, nullptr, def_expr);
}
static exprstate lparen_0() {
return exprstate(exprstatetype::lparen_0, nullptr, nullptr);
}
exprstatetype exs_type() const { return exs_type_; }
@ -99,10 +119,10 @@ namespace xo {
bool admits_semicolon() const;
/** true iff this parsing state admits a singleassign '=' as next token **/
bool admits_singleassign() const;
#ifdef NOT_YET
/** true iff this parsing state admits a leftparen '(' as next token **/
bool admits_leftparen() const;
#endif
/** truee iff this parsing state admits a rightparen ')' as next token **/
bool admits_rightparen() const;
/** true iff this parsing state admits a 64-bit floating point literal token **/
bool admits_f64() const;
@ -134,10 +154,10 @@ namespace xo {
void on_semicolon(exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
void on_singleassign(exprstatestack * p_stack);
#ifdef NOT_YET
void on_leftparen(exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
#endif
void on_rightparen(exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
void on_f64(const token_type & tk,
exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
@ -147,7 +167,7 @@ namespace xo {
* def foo : f64 = 1 ;
* ^ ^ ^ ^ ^ ^ ^ ^
* | | | | | | | (done)
* | | | | | | ??
* | | | | | | def_4:expect_rhs_expression:expr_progress
* | | | | | def_4:expect_rhs_expression
* | | | | def_3
* | | | def_2:expect_type
@ -173,6 +193,11 @@ namespace xo {
* May be nested within a def_expr
**/
rp<ConvertExprAccess> cvt_expr_;
#ifdef NOT_YET
/* polymorphic state here */
std::unique_ptr<exprstateaux> state_;
#endif
}; /*exprstate*/
inline std::ostream &
@ -220,6 +245,12 @@ namespace xo {
std::vector<exprstate> stack_;
};
inline std::ostream &
operator<< (std::ostream & os, const exprstatestack & x) {
x.print(os);
return os;
}
/** schematica parser
*
* Examples:

View file

@ -36,6 +36,12 @@ namespace xo {
return "def_3";
case exprstatetype::def_4:
return "def_4";
case exprstatetype::def_5:
return "def_5";
case exprstatetype::lparen_0:
return "lparen_0";
case exprstatetype::lparen_1:
return "lparen_1";
case exprstatetype::expect_rhs_expression:
return "expect_rhs_expression";
case exprstatetype::expect_symbol:
@ -62,12 +68,15 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
/* note for def_4:
* rhs could certainly be a function body that contains
* nested defines; but then immediately-enclosing-exprstate
* would be a block
*/
return false;
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
case exprstatetype::expect_rhs_expression:
return false;
case exprstatetype::expect_symbol:
@ -93,8 +102,11 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
return false;
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
case exprstatetype::expect_rhs_expression:
/* treat symbol as variable name */
return true;
@ -131,6 +143,9 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
case exprstatetype::expect_rhs_expression:
/* rhs-expressions (or expressions for that matter)
* may not begin with a colon
@ -160,6 +175,11 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
return false;
case exprstatetype::def_5:
return true;
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_symbol:
case exprstatetype::expect_type:
@ -206,7 +226,10 @@ namespace xo {
return true;
case exprstatetype::def_4:
case exprstatetype::def_5:
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
case exprstatetype::expect_rhs_expression:
/* rhs-expressions (or expressions for that matter)
* may not begin with singleassign '='
@ -236,6 +259,13 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
return false;
case exprstatetype::lparen_0:
return true;
case exprstatetype::lparen_1:
return false;
case exprstatetype::expect_rhs_expression:
@ -257,7 +287,6 @@ namespace xo {
return false;
}
#ifdef NOT_YET
bool
exprstate::admits_leftparen() const {
switch (exs_type_) {
@ -278,6 +307,7 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
/* input like
* def foo : f64 = (
* ^ ^ ^ ^ ^
@ -291,6 +321,12 @@ namespace xo {
*/
return false;
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
/* unreachable */
assert(false);
return false;
case exprstatetype::expect_rhs_expression:
/* can always begin non-toplevel expression with '(' */
return true;
@ -301,6 +337,53 @@ namespace xo {
case exprstatetype::expect_symbol:
return false;
case exprstatetype::expr_progress:
/* todo: will parse as function call */
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
assert(false);
return false;
}
return false;
}
bool
exprstate::admits_rightparen() const {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
return false;
case exprstatetype::lparen_0:
/* unreachable -- will have pushed expect_rhs_expression */
assert(false);
return false;
case exprstatetype::lparen_1:
return true;
case exprstatetype::expect_rhs_expression:
return false;
case exprstatetype::expect_type:
return false;
case exprstatetype::expect_symbol:
return false;
case exprstatetype::expr_progress:
/* satisfies expression form */
return true;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -310,7 +393,6 @@ namespace xo {
return false;
}
#endif
void
exprstate::on_def(exprstatestack * p_stack) {
@ -375,10 +457,22 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
/* unreachable */
assert(false);
return;
case exprstatetype::lparen_0:
/* todo: variable reference */
assert(false);
break;
case exprstatetype::lparen_1:
/* unreachable */
assert(false);
break;
case exprstatetype::expect_rhs_expression:
{
/* various possibilities when looking for rhs expression:
@ -468,6 +562,8 @@ namespace xo {
exprstatestack * /*p_stack*/,
rp<Expression> * /*p_emit_expr*/)
{
/* returning type description to somethign that wants it */
switch (this->exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
@ -487,10 +583,16 @@ namespace xo {
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
/* NOT IMPLEMENTED */
assert(false);
return;
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
assert(false);
return;
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_type:
case exprstatetype::expect_symbol:
@ -557,6 +659,30 @@ namespace xo {
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
p_stack->top_exprstate().on_expr(expr,
p_stack,
p_emit_expr);
/* control here on input like:
* (1.234;
*
* a. '(' sets up stack [lparen_0:expect_rhs_expression]
* (see exprstate::on_leftparen())
* b. 1.234 pushes (in case operators) [lparen_0:expect_rhs_expression:expr_progress]
* (see exprstate::on_f64())
* c. semicolon completes expr_progress [lparen_0:expect_rhs_expression]
* deliver expresssion to expect_rhs_expression.on_expr()
* (see exprstate::on_expr())
* d. expr_rhs_expression forwards expression to [lparen_0]
* e. lparen_0 advances to [lparen_1]
* f. now deliver semicolon; [lparen_1] rejects
*/
p_stack->top_exprstate().on_semicolon(p_stack, p_emit_expr);
} else if (this->exs_type_ == exprstatetype::def_5) {
rp<Expression> expr = this->def_expr_;
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
p_stack->top_exprstate().on_expr(expr,
p_stack,
p_emit_expr);
@ -590,25 +716,15 @@ namespace xo {
}
}
#ifdef OBSOLETE
/**
consider input:
x := y(foo())
Is that an assignment x:=y followed by function call (foo()) ?
Or assignment with rhs calling a function y() with argument foo()
policy: forbid parenthesis as beginning of a toplevel expression
**/
void
exprstate::on_leftparen(exprstatestack * p_stack,
rp<Expression> * p_emit_expr)
rp<Expression> * /*p_emit_expr*/)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_leftparen";
if (!this->admits_leftparen())
{
throw std::runtime_error(tostr(self_name,
@ -617,13 +733,64 @@ namespace xo {
}
if (this->exs_type_ == exprstatetype::expect_rhs_expression) {
/* push lparen_0 to remember to look for subsequent rightparen */
p_stack->push_exprstate(exprstatetype::lparen_0);
p_stack->push_exprstate(exprstatetype::expect_rhs_expression);
/* push lparen_0 to remember to look for subsequent rightparen. */
p_stack->push_exprstate(exprstate::lparen_0());
p_stack->push_exprstate(exprstate::expect_rhs_expression());
}
}
void
exprstate::on_rightparen(exprstatestack * p_stack,
rp<Expression> * p_emit_expr)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_rightparen";
if (!this->admits_rightparen())
{
throw std::runtime_error(tostr(self_name,
": unexpected rightparen ')' for parsing state",
xtag("state", *this)));
}
if (this->exs_type_ == exprstatetype::expr_progress) {
/* stack may be something like:
*
* lparen_0
* expect_rhs_expression
* expr_progress
* <-- rightparen
*
* 1. rightparen completes expression-in-progress
* 2. rightparen must then match innermost waiting lparen_0
*/
/* right paren confirms stack expression */
rp<Expression> expr = this->gen_expr_;
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
if (p_stack->empty()) {
throw std::runtime_error(tostr(self_name,
": expected non-empty parsing stack"));
}
log && log(xtag("stack", *p_stack));
p_stack->top_exprstate().on_expr(expr, p_stack, p_emit_expr);
/* now deliver rightparen */
p_stack->top_exprstate().on_rightparen(p_stack, p_emit_expr);
} else if (this->exs_type_ == exprstatetype::lparen_1) {
rp<Expression> expr = this->gen_expr_;
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
p_stack->top_exprstate().on_expr(expr, p_stack, p_emit_expr);
}
}
#endif
void
exprstate::on_f64(const token_type & tk,
@ -643,6 +810,10 @@ namespace xo {
}
if (this->exs_type_ == exprstatetype::expect_rhs_expression) {
/* e.g.
* def pi = 3.14159265;
* \---tk---/
*/
p_stack->push_exprstate
(exprstate::make_expr_progress
(Constant<double>::make(tk.f64_value())));
@ -684,8 +855,13 @@ namespace xo {
return;
case tokentype::tk_leftparen:
this->on_leftparen(p_stack, p_emit_expr);
return;
case tokentype::tk_rightparen:
this->on_rightparen(p_stack, p_emit_expr);
return;
case tokentype::tk_leftbracket:
case tokentype::tk_rightbracket:
case tokentype::tk_leftbrace:
@ -772,7 +948,7 @@ namespace xo {
* Need to be able to locate variable by type
* 2. if ir_type is an expression, adopt as rhs
*/
rp<Expression> rhs_value = expr.get();
rp<Expression> rhs_value = expr.promote();
if (this->cvt_expr_)
this->cvt_expr_->assign_arg(rhs_value);
@ -781,11 +957,25 @@ namespace xo {
rp<Expression> def_expr = this->def_expr_;
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
this->exs_type_ = exprstatetype::def_5;
return;
}
p_stack->top_exprstate().on_expr(def_expr,
p_stack,
p_emit_expr);
case exprstatetype::def_5:
assert(false);
return;
case exprstatetype::lparen_0: {
this->exs_type_ = exprstatetype::lparen_1; /* wants on_rightparen */
p_stack->push_exprstate(exprstate::make_expr_progress(expr.promote()));
return;
}
case exprstatetype::lparen_1: {
this->gen_expr_ = expr.promote();
/* expect immediate incoming call, this time to on_rightparen() */
return;
}
@ -846,6 +1036,13 @@ namespace xo {
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::def_5:
/* NOT IMPLEMENTED */
assert(false);
return;
case exprstatetype::lparen_0:
case exprstatetype::lparen_1:
/* NOT IMPLEMENTED */
assert(false);
return;

View file

@ -7,19 +7,34 @@ namespace xo {
using xo::scm::reader;
namespace ut {
namespace {
struct test_case {
const char * text_;
};
std::vector<test_case> s_testcase_v = {
{"def foo : f64 = 3.14159265;"},
{"def foo : f64 = (3.14159265);"}
};
}
TEST_CASE("reader", "[reader]") {
for (std::size_t i_tc = 0; i_tc < 1; ++i_tc) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag), xtag("utest", "reader"));
for (std::size_t i_tc = 0; i_tc < s_testcase_v.size(); ++i_tc) {
const test_case & tc = s_testcase_v[i_tc];
reader rdr;
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag),
xtag("utest", "reader"), xtag("i_tc", i_tc));
scope log(XO_ENTER2(always, c_debug_flag, "reader.testcase"),
xtag("i_tc", i_tc));
rdr.begin_translation_unit();
try {
auto input
= reader::span_type::from_cstr("def foo : f64 = 3.14159265;");
= reader::span_type::from_cstr(tc.text_);
auto rr
= rdr.read_expr(input, true /*eof*/);