From 6ff2ac97b0f4764899ada6fe5416f7313eaaab72 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Wed, 7 Aug 2024 11:52:20 -0400 Subject: [PATCH] xo-reader: feat: handle parenthesized expressions --- include/xo/reader/parser.hpp | 41 +++++- src/reader/parser.cpp | 247 +++++++++++++++++++++++++++++++---- utest/reader.test.cpp | 25 +++- 3 files changed, 278 insertions(+), 35 deletions(-) diff --git a/include/xo/reader/parser.hpp b/include/xo/reader/parser.hpp index 2c26a8e9..715bc23f 100644 --- a/include/xo/reader/parser.hpp +++ b/include/xo/reader/parser.hpp @@ -25,6 +25,12 @@ namespace xo { def_2, def_3, def_4, + def_5, + + /* lparen_0: look for expression; capture + advance to lparen_1 */ + lparen_0, + /* lparen_1: expect rightparen */ + lparen_1, expect_rhs_expression, expect_symbol, @@ -46,6 +52,17 @@ namespace xo { class exprstatestack; +#ifdef NOT_YET + class exprstateaux { + public: + }; + + class lparen_xsa : public exprstateaux { + public: + private: + }; +#endif + /** state associated with a partially-parsed expression. **/ class exprstate { @@ -84,6 +101,9 @@ namespace xo { static exprstate def_0(rp def_expr) { return exprstate(exprstatetype::def_0, nullptr, def_expr); } + static exprstate lparen_0() { + return exprstate(exprstatetype::lparen_0, nullptr, nullptr); + } exprstatetype exs_type() const { return exs_type_; } @@ -99,10 +119,10 @@ namespace xo { bool admits_semicolon() const; /** true iff this parsing state admits a singleassign '=' as next token **/ bool admits_singleassign() const; -#ifdef NOT_YET /** true iff this parsing state admits a leftparen '(' as next token **/ bool admits_leftparen() const; -#endif + /** truee iff this parsing state admits a rightparen ')' as next token **/ + bool admits_rightparen() const; /** true iff this parsing state admits a 64-bit floating point literal token **/ bool admits_f64() const; @@ -134,10 +154,10 @@ namespace xo { void on_semicolon(exprstatestack * p_stack, rp * p_emit_expr); void on_singleassign(exprstatestack * p_stack); -#ifdef NOT_YET void on_leftparen(exprstatestack * p_stack, rp * p_emit_expr); -#endif + void on_rightparen(exprstatestack * p_stack, + rp * p_emit_expr); void on_f64(const token_type & tk, exprstatestack * p_stack, rp * p_emit_expr); @@ -147,7 +167,7 @@ namespace xo { * def foo : f64 = 1 ; * ^ ^ ^ ^ ^ ^ ^ ^ * | | | | | | | (done) - * | | | | | | ?? + * | | | | | | def_4:expect_rhs_expression:expr_progress * | | | | | def_4:expect_rhs_expression * | | | | def_3 * | | | def_2:expect_type @@ -173,6 +193,11 @@ namespace xo { * May be nested within a def_expr **/ rp cvt_expr_; + +#ifdef NOT_YET + /* polymorphic state here */ + std::unique_ptr state_; +#endif }; /*exprstate*/ inline std::ostream & @@ -220,6 +245,12 @@ namespace xo { std::vector stack_; }; + inline std::ostream & + operator<< (std::ostream & os, const exprstatestack & x) { + x.print(os); + return os; + } + /** schematica parser * * Examples: diff --git a/src/reader/parser.cpp b/src/reader/parser.cpp index b5b7dcd6..a03199ad 100644 --- a/src/reader/parser.cpp +++ b/src/reader/parser.cpp @@ -36,6 +36,12 @@ namespace xo { return "def_3"; case exprstatetype::def_4: return "def_4"; + case exprstatetype::def_5: + return "def_5"; + case exprstatetype::lparen_0: + return "lparen_0"; + case exprstatetype::lparen_1: + return "lparen_1"; case exprstatetype::expect_rhs_expression: return "expect_rhs_expression"; case exprstatetype::expect_symbol: @@ -62,12 +68,15 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: /* note for def_4: * rhs could certainly be a function body that contains * nested defines; but then immediately-enclosing-exprstate * would be a block */ return false; + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: case exprstatetype::expect_rhs_expression: return false; case exprstatetype::expect_symbol: @@ -93,8 +102,11 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: return false; + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: case exprstatetype::expect_rhs_expression: /* treat symbol as variable name */ return true; @@ -131,6 +143,9 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: case exprstatetype::expect_rhs_expression: /* rhs-expressions (or expressions for that matter) * may not begin with a colon @@ -160,6 +175,11 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + return false; + case exprstatetype::def_5: + return true; + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: case exprstatetype::expect_rhs_expression: case exprstatetype::expect_symbol: case exprstatetype::expect_type: @@ -206,7 +226,10 @@ namespace xo { return true; case exprstatetype::def_4: + case exprstatetype::def_5: + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: case exprstatetype::expect_rhs_expression: /* rhs-expressions (or expressions for that matter) * may not begin with singleassign '=' @@ -236,6 +259,13 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: + return false; + + case exprstatetype::lparen_0: + return true; + + case exprstatetype::lparen_1: return false; case exprstatetype::expect_rhs_expression: @@ -257,7 +287,6 @@ namespace xo { return false; } -#ifdef NOT_YET bool exprstate::admits_leftparen() const { switch (exs_type_) { @@ -278,6 +307,7 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: /* input like * def foo : f64 = ( * ^ ^ ^ ^ ^ @@ -291,6 +321,12 @@ namespace xo { */ return false; + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: + /* unreachable */ + assert(false); + return false; + case exprstatetype::expect_rhs_expression: /* can always begin non-toplevel expression with '(' */ return true; @@ -301,6 +337,53 @@ namespace xo { case exprstatetype::expect_symbol: return false; + case exprstatetype::expr_progress: + /* todo: will parse as function call */ + return false; + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + assert(false); + return false; + } + + return false; + } + + bool + exprstate::admits_rightparen() const { + switch (exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + case exprstatetype::def_5: + return false; + + case exprstatetype::lparen_0: + /* unreachable -- will have pushed expect_rhs_expression */ + assert(false); + return false; + + case exprstatetype::lparen_1: + return true; + + case exprstatetype::expect_rhs_expression: + return false; + + case exprstatetype::expect_type: + return false; + + case exprstatetype::expect_symbol: + return false; + + case exprstatetype::expr_progress: + /* satisfies expression form */ + return true; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -310,7 +393,6 @@ namespace xo { return false; } -#endif void exprstate::on_def(exprstatestack * p_stack) { @@ -375,10 +457,22 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: /* unreachable */ assert(false); return; + case exprstatetype::lparen_0: + /* todo: variable reference */ + assert(false); + break; + + case exprstatetype::lparen_1: + /* unreachable */ + + assert(false); + break; + case exprstatetype::expect_rhs_expression: { /* various possibilities when looking for rhs expression: @@ -468,6 +562,8 @@ namespace xo { exprstatestack * /*p_stack*/, rp * /*p_emit_expr*/) { + /* returning type description to somethign that wants it */ + switch (this->exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: case exprstatetype::def_0: @@ -487,10 +583,16 @@ namespace xo { case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: /* NOT IMPLEMENTED */ assert(false); return; + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: + assert(false); + return; + case exprstatetype::expect_rhs_expression: case exprstatetype::expect_type: case exprstatetype::expect_symbol: @@ -557,6 +659,30 @@ namespace xo { p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + p_stack->top_exprstate().on_expr(expr, + p_stack, + p_emit_expr); + /* control here on input like: + * (1.234; + * + * a. '(' sets up stack [lparen_0:expect_rhs_expression] + * (see exprstate::on_leftparen()) + * b. 1.234 pushes (in case operators) [lparen_0:expect_rhs_expression:expr_progress] + * (see exprstate::on_f64()) + * c. semicolon completes expr_progress [lparen_0:expect_rhs_expression] + * deliver expresssion to expect_rhs_expression.on_expr() + * (see exprstate::on_expr()) + * d. expr_rhs_expression forwards expression to [lparen_0] + * e. lparen_0 advances to [lparen_1] + * f. now deliver semicolon; [lparen_1] rejects + */ + + p_stack->top_exprstate().on_semicolon(p_stack, p_emit_expr); + } else if (this->exs_type_ == exprstatetype::def_5) { + rp expr = this->def_expr_; + + p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + p_stack->top_exprstate().on_expr(expr, p_stack, p_emit_expr); @@ -590,25 +716,15 @@ namespace xo { } } -#ifdef OBSOLETE - /** - consider input: - - x := y(foo()) - - Is that an assignment x:=y followed by function call (foo()) ? - Or assignment with rhs calling a function y() with argument foo() - - policy: forbid parenthesis as beginning of a toplevel expression - **/ - void exprstate::on_leftparen(exprstatestack * p_stack, - rp * p_emit_expr) + rp * /*p_emit_expr*/) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); + constexpr const char * self_name = "exprstate::on_leftparen"; + if (!this->admits_leftparen()) { throw std::runtime_error(tostr(self_name, @@ -617,13 +733,64 @@ namespace xo { } if (this->exs_type_ == exprstatetype::expect_rhs_expression) { - /* push lparen_0 to remember to look for subsequent rightparen */ - p_stack->push_exprstate(exprstatetype::lparen_0); - - p_stack->push_exprstate(exprstatetype::expect_rhs_expression); + /* push lparen_0 to remember to look for subsequent rightparen. */ + p_stack->push_exprstate(exprstate::lparen_0()); + p_stack->push_exprstate(exprstate::expect_rhs_expression()); + } + } + + void + exprstate::on_rightparen(exprstatestack * p_stack, + rp * p_emit_expr) + { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_rightparen"; + + if (!this->admits_rightparen()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected rightparen ')' for parsing state", + xtag("state", *this))); + } + + if (this->exs_type_ == exprstatetype::expr_progress) { + /* stack may be something like: + * + * lparen_0 + * expect_rhs_expression + * expr_progress + * <-- rightparen + * + * 1. rightparen completes expression-in-progress + * 2. rightparen must then match innermost waiting lparen_0 + */ + + /* right paren confirms stack expression */ + rp expr = this->gen_expr_; + + p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + + if (p_stack->empty()) { + throw std::runtime_error(tostr(self_name, + ": expected non-empty parsing stack")); + } + + log && log(xtag("stack", *p_stack)); + + p_stack->top_exprstate().on_expr(expr, p_stack, p_emit_expr); + + /* now deliver rightparen */ + p_stack->top_exprstate().on_rightparen(p_stack, p_emit_expr); + } else if (this->exs_type_ == exprstatetype::lparen_1) { + rp expr = this->gen_expr_; + + p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + + p_stack->top_exprstate().on_expr(expr, p_stack, p_emit_expr); } } -#endif void exprstate::on_f64(const token_type & tk, @@ -643,6 +810,10 @@ namespace xo { } if (this->exs_type_ == exprstatetype::expect_rhs_expression) { + /* e.g. + * def pi = 3.14159265; + * \---tk---/ + */ p_stack->push_exprstate (exprstate::make_expr_progress (Constant::make(tk.f64_value()))); @@ -684,8 +855,13 @@ namespace xo { return; case tokentype::tk_leftparen: + this->on_leftparen(p_stack, p_emit_expr); + return; case tokentype::tk_rightparen: + this->on_rightparen(p_stack, p_emit_expr); + return; + case tokentype::tk_leftbracket: case tokentype::tk_rightbracket: case tokentype::tk_leftbrace: @@ -772,7 +948,7 @@ namespace xo { * Need to be able to locate variable by type * 2. if ir_type is an expression, adopt as rhs */ - rp rhs_value = expr.get(); + rp rhs_value = expr.promote(); if (this->cvt_expr_) this->cvt_expr_->assign_arg(rhs_value); @@ -781,11 +957,25 @@ namespace xo { rp def_expr = this->def_expr_; - p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + this->exs_type_ = exprstatetype::def_5; + return; + } - p_stack->top_exprstate().on_expr(def_expr, - p_stack, - p_emit_expr); + case exprstatetype::def_5: + assert(false); + return; + + case exprstatetype::lparen_0: { + this->exs_type_ = exprstatetype::lparen_1; /* wants on_rightparen */ + p_stack->push_exprstate(exprstate::make_expr_progress(expr.promote())); + + return; + } + + case exprstatetype::lparen_1: { + this->gen_expr_ = expr.promote(); + + /* expect immediate incoming call, this time to on_rightparen() */ return; } @@ -846,6 +1036,13 @@ namespace xo { case exprstatetype::def_2: case exprstatetype::def_3: case exprstatetype::def_4: + case exprstatetype::def_5: + /* NOT IMPLEMENTED */ + assert(false); + return; + + case exprstatetype::lparen_0: + case exprstatetype::lparen_1: /* NOT IMPLEMENTED */ assert(false); return; diff --git a/utest/reader.test.cpp b/utest/reader.test.cpp index eff43622..8aff0011 100644 --- a/utest/reader.test.cpp +++ b/utest/reader.test.cpp @@ -7,19 +7,34 @@ namespace xo { using xo::scm::reader; namespace ut { + namespace { + struct test_case { + const char * text_; + }; + + std::vector s_testcase_v = { + {"def foo : f64 = 3.14159265;"}, + {"def foo : f64 = (3.14159265);"} + }; + } + TEST_CASE("reader", "[reader]") { - for (std::size_t i_tc = 0; i_tc < 1; ++i_tc) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag), xtag("utest", "reader")); + + for (std::size_t i_tc = 0; i_tc < s_testcase_v.size(); ++i_tc) { + const test_case & tc = s_testcase_v[i_tc]; + reader rdr; - constexpr bool c_debug_flag = true; - scope log(XO_DEBUG(c_debug_flag), - xtag("utest", "reader"), xtag("i_tc", i_tc)); + scope log(XO_ENTER2(always, c_debug_flag, "reader.testcase"), + xtag("i_tc", i_tc)); rdr.begin_translation_unit(); try { auto input - = reader::span_type::from_cstr("def foo : f64 = 3.14159265;"); + = reader::span_type::from_cstr(tc.text_); auto rr = rdr.read_expr(input, true /*eof*/);