diff --git a/include/xo/reader/parser.hpp b/include/xo/reader/parser.hpp index 5888c9f9..2c26a8e9 100644 --- a/include/xo/reader/parser.hpp +++ b/include/xo/reader/parser.hpp @@ -30,6 +30,8 @@ namespace xo { expect_symbol, expect_type, + expr_progress, + n_exprstatetype }; @@ -58,18 +60,29 @@ namespace xo { public: exprstate() = default; exprstate(exprstatetype exs_type, - rp def_expr = nullptr) + rp candidate_expr, + rp def_expr) : exs_type_{exs_type}, + gen_expr_{std::move(candidate_expr)}, def_expr_{std::move(def_expr)} {} static exprstate expect_toplevel_expression_sequence() { - return exprstate(exprstatetype::expect_toplevel_expression_sequence); + return exprstate(exprstatetype::expect_toplevel_expression_sequence, nullptr, nullptr); } - static exprstate def_0() { - return exprstate(exprstatetype::def_0); + static exprstate expect_rhs_expression() { + return exprstate(exprstatetype::expect_rhs_expression, nullptr, nullptr); } static exprstate expect_symbol() { - return exprstate(exprstatetype::expect_symbol); + return exprstate(exprstatetype::expect_symbol, nullptr, nullptr); + } + static exprstate expect_type() { + return exprstate(exprstatetype::expect_type, nullptr, nullptr); + } + static exprstate make_expr_progress(rp expr) { + return exprstate(exprstatetype::expr_progress, expr, nullptr); + } + static exprstate def_0(rp def_expr) { + return exprstate(exprstatetype::def_0, nullptr, def_expr); } exprstatetype exs_type() const { return exs_type_; } @@ -82,8 +95,14 @@ namespace xo { bool admits_symbol() const; /** true iff this parsing state admits a colon as next token **/ bool admits_colon() const; + /** true iff this parsing state admits a semicolon as next token **/ + bool admits_semicolon() const; /** true iff this parsing state admits a singleassign '=' as next token **/ bool admits_singleassign() const; +#ifdef NOT_YET + /** true iff this parsing state admits a leftparen '(' as next token **/ + bool admits_leftparen() const; +#endif /** true iff this parsing state admits a 64-bit floating point literal token **/ bool admits_f64() const; @@ -112,16 +131,23 @@ namespace xo { exprstatestack * p_stack, rp * p_emit_expr); void on_colon(exprstatestack * p_stack); + void on_semicolon(exprstatestack * p_stack, + rp * p_emit_expr); void on_singleassign(exprstatestack * p_stack); +#ifdef NOT_YET + void on_leftparen(exprstatestack * p_stack, + rp * p_emit_expr); +#endif void on_f64(const token_type & tk, exprstatestack * p_stack, rp * p_emit_expr); private: /** - * def foo : f64 = 1 - * ^ ^ ^ ^ ^ ^ ^ - * | | | | | | (done) + * def foo : f64 = 1 ; + * ^ ^ ^ ^ ^ ^ ^ ^ + * | | | | | | | (done) + * | | | | | | ?? * | | | | | def_4:expect_rhs_expression * | | | | def_3 * | | | def_2:expect_type @@ -139,6 +165,8 @@ namespace xo { **/ exprstatetype exs_type_; + /** generic expression **/ + rp gen_expr_; /** scaffold a define-expression here **/ rp def_expr_; /** scafford a convert-expression here. @@ -199,10 +227,10 @@ namespace xo { * decltype point * * // forward declarations - * decl pi : f64 - * decl fib(n : i32) -> i32 + * decl pi : f64; + * decl fib(n : i32) -> i32; * - * def pi = 3.14159265 // constant. = is single assignment + * def pi = 3.14159265; // constant. = is single assignment * * def fib(n : i32) -> i32 { * // nested defs ok @@ -211,33 +239,37 @@ namespace xo { * // (n == 0) ? s1 : aux(n - 1, s1 + s2, s1) * // * if (n == 0) { - * s1 + * s1; * } else { - * aux(n - 1, s1 + s2, s1) + * aux(n - 1, s1 + s2, s1); * } * * // or: * // if (n == 0) ? s1 : aux(n - 1, s1 + s2, s1) * } * - * aux(n=n, s1=1, s2=0) + * aux(n=n, s1=1, s2=0); * } * - * def anotherfib = lambda(n : i32) { fib(n) } + * def x := "fu"; // non-constant + * x += "bar"; * - * def any : object - * def l : list = '() + * def anotherfib = lambda(n : i32) { fib(n) }; * - * deftype point :: {x : f64, y : f64} - * deftype polar :: {arg : f64, mag : f64} + * def any : object; + * def l : list = '(); + * + * deftype point :: {x : f64, y : f64}; + * deftype polar :: {arg : f64, mag : f64}; * * def polar2rect(pt : polar) -> point { * point(x = pt.mag * cos(arg), - * y = pt.mag * sin(arg)) + * y = pt.mag * sin(arg)); * } * * Grammar: - * toplevel-program = expression* + * toplevel-program = $expression(1); ..; $expression(n) + * * type-decl = decltype $typename [<$tp1 .. $tpn>] * expression = define-expr * | literal-expr @@ -245,6 +277,7 @@ namespace xo { * | apply-expr * | if-expr * | lambda-expr + * | arithmetic-expr * | block * * define-expr = type-decl @@ -297,6 +330,23 @@ namespace xo { * .., * $paramname(n) : $type(n)) body-expr * body-expr = expression + * + * arithmetic-expr = expression binop expression + * + * binop = + + * | - + * | * + * | / + * | | + * | & + * | ^ + * | == + * | != + * | < + * | <= + * | => + * | > + * **/ class parser { public: diff --git a/include/xo/reader/reader.hpp b/include/xo/reader/reader.hpp index 4009f722..28e0b3a6 100644 --- a/include/xo/reader/reader.hpp +++ b/include/xo/reader/reader.hpp @@ -18,6 +18,9 @@ namespace xo { using Expression = xo::ast::Expression; using span_type = span; + reader_result(rp expr, span_type rem) + : expr_{std::move(expr)}, rem_{rem} {} + /** parsed schematica expression **/ rp expr_; /** span giving text input consumed to construct expr, diff --git a/src/reader/parser.cpp b/src/reader/parser.cpp index b4a9eaf9..b5b7dcd6 100644 --- a/src/reader/parser.cpp +++ b/src/reader/parser.cpp @@ -21,7 +21,7 @@ namespace xo { namespace scm { const char * exprstatetype_descr(exprstatetype x) { - switch(x) { + switch (x) { case exprstatetype::invalid: return "?invalid"; case exprstatetype::expect_toplevel_expression_sequence: @@ -42,6 +42,8 @@ namespace xo { return "expect_symbol"; case exprstatetype::expect_type: return "expect_type"; + case exprstatetype::expr_progress: + return "expr_progress"; case exprstatetype::n_exprstatetype: break; } @@ -51,7 +53,7 @@ namespace xo { bool exprstate::admits_definition() const { - switch(exs_type_) { + switch (exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: return true; @@ -71,6 +73,8 @@ namespace xo { case exprstatetype::expect_symbol: case exprstatetype::expect_type: return false; + case exprstatetype::expr_progress: + return false; case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -82,7 +86,7 @@ namespace xo { bool exprstate::admits_symbol() const { - switch(exs_type_) { + switch (exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: case exprstatetype::def_0: case exprstatetype::def_1: @@ -102,6 +106,9 @@ namespace xo { /* treat symbol as typename */ return true; + case exprstatetype::expr_progress: + return false; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -113,7 +120,7 @@ namespace xo { bool exprstate::admits_colon() const { - switch(exs_type_) { + switch (exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: case exprstatetype::def_0: return false; @@ -132,6 +139,9 @@ namespace xo { case exprstatetype::expect_type: return false; + case exprstatetype::expr_progress: + return false; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -141,14 +151,37 @@ namespace xo { return false; } + bool + exprstate::admits_semicolon() const { + switch (exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + case exprstatetype::expect_rhs_expression: + case exprstatetype::expect_symbol: + case exprstatetype::expect_type: + return false; + case exprstatetype::expr_progress: + return true; + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + return false; + } + + return false; + } + bool exprstate::admits_singleassign() const { - switch(exs_type_) { + switch (exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: /* - * def foo = 1 - * def foo : f64 = 1 + * def foo = 1 ; + * def foo : f64 = 1 ; * ^ ^ ^ ^ ^ ^ ^ * | | | | | | (done) * | | | | | def_4:expect_rhs_expression @@ -182,6 +215,9 @@ namespace xo { case exprstatetype::expect_type: return false; + case exprstatetype::expr_progress: + return false; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -193,7 +229,7 @@ namespace xo { bool exprstate::admits_f64() const { - switch(exs_type_) { + switch (exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: case exprstatetype::def_0: case exprstatetype::def_1: @@ -209,6 +245,9 @@ namespace xo { case exprstatetype::expect_type: return false; + case exprstatetype::expr_progress: + return false; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -218,6 +257,61 @@ namespace xo { return false; } +#ifdef NOT_YET + bool + exprstate::admits_leftparen() const { + switch (exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + /* input like + * (function(blah...)) + * not allowed at toplevel; + * creates ambiguity e.g. consider + * x := foo + * (bar) + * + * is rhs 'foo' or 'foo(bar)' + */ + return false; + + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + /* input like + * def foo : f64 = ( + * ^ ^ ^ ^ ^ + * | | | | def_4 + * | | | def_3 + * | | def_2 + * | def_1 + * def_0 + * + * not allowed or relies on pushing another state + */ + return false; + + case exprstatetype::expect_rhs_expression: + /* can always begin non-toplevel expression with '(' */ + return true; + + case exprstatetype::expect_type: + return false; + + case exprstatetype::expect_symbol: + return false; + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + assert(false); + return false; + } + + return false; + } +#endif + void exprstate::on_def(exprstatestack * p_stack) { constexpr bool c_debug_flag = true; @@ -233,13 +327,13 @@ namespace xo { xtag("state", *this))); } - p_stack->push_exprstate(exprstate(exprstatetype::def_0, - DefineExprAccess::make_empty())); + p_stack->push_exprstate + (exprstate::def_0(DefineExprAccess::make_empty())); /* todo: replace: * expect_symbol_or_function_signature() */ - p_stack->push_exprstate(exprstatetype::expect_symbol); + p_stack->push_exprstate(exprstate::expect_symbol()); /* keyword 'def' introduces a definition: * def pi : f64 = 3.14159265 @@ -255,6 +349,8 @@ namespace xo { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); + log && log(xtag("exstype", p_stack->top_exprstate().exs_type())); + constexpr const char * self_name = "exprstate::on_symbol"; if (!this->admits_symbol()) { @@ -265,7 +361,7 @@ namespace xo { xtag("state", *this))); } - switch(this->exs_type_) { + switch (this->exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: throw std::runtime_error (tostr(self_name, @@ -284,13 +380,44 @@ namespace xo { return; case exprstatetype::expect_rhs_expression: - case exprstatetype::expect_symbol: - /* have to do pop first */ + { + /* various possibilities when looking for rhs expression: + * + * x := y // (1) + * x := f(a) // (2) + * x := f(a,b) // (3) + * + * need lookahead token following symbol to distinguish + * between (1) (symbol completes rhs expression) + * and {(2), (3)} (symbol is function call) + */ + /* have to do pop first, before sending symbol to + * the o.g. symbol-requester + */ +#ifdef NOT_YET + p_stack->push_exprstate(exprstate(exprstatetype::expr_progress, + Variable::make(name, type))); +#endif + +#ifdef LATER + p_stack->pop_exprstate(); + p_stack->top_exprstate().on_symbol(tk.text(), + p_stack, p_emit_expr); +#endif + return; + } + + case exprstatetype::expect_symbol: + { + /* have to do pop first, before sending symbol to + * the o.g. symbol-requester + */ p_stack->pop_exprstate(); p_stack->top_exprstate().on_symbol(tk.text(), p_stack, p_emit_expr); return; + } case exprstatetype::expect_type: { TypeDescr td = nullptr; @@ -321,20 +448,27 @@ namespace xo { return; } + case exprstatetype::expr_progress: + /* illegal input, e.g. + * foo bar + */ + assert(false); + return; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ assert(false); return; } - } + } /*on_symbol*/ void exprstate::on_typedescr(TypeDescr td, exprstatestack * /*p_stack*/, rp * /*p_emit_expr*/) { - switch(this->exs_type_) { + switch (this->exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: case exprstatetype::def_0: case exprstatetype::def_1: @@ -366,6 +500,10 @@ namespace xo { assert(false); return; + case exprstatetype::expr_progress: + assert(false); + return; + case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -392,7 +530,36 @@ namespace xo { if (this->exs_type_ == exprstatetype::def_1) { this->exs_type_ = exprstatetype::def_2; - p_stack->push_exprstate(exprstatetype::expect_type); + p_stack->push_exprstate(exprstate::expect_type()); + } else { + assert(false); + } + } + + void + exprstate::on_semicolon(exprstatestack * p_stack, + rp * p_emit_expr) + { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_semicolon"; + + if (!this->admits_semicolon()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected semicolon for parsing state", + xtag("state", *this))); + } + + if (this->exs_type_ == exprstatetype::expr_progress) { + rp expr = this->gen_expr_; + + p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + + p_stack->top_exprstate().on_expr(expr, + p_stack, + p_emit_expr); } else { assert(false); } @@ -417,16 +584,51 @@ namespace xo { { this->exs_type_ = exprstatetype::def_4; - p_stack->push_exprstate(exprstatetype::expect_rhs_expression); + p_stack->push_exprstate(exprstate::expect_rhs_expression()); } else { assert(false); } } +#ifdef OBSOLETE + /** + consider input: + + x := y(foo()) + + Is that an assignment x:=y followed by function call (foo()) ? + Or assignment with rhs calling a function y() with argument foo() + + policy: forbid parenthesis as beginning of a toplevel expression + **/ + + void + exprstate::on_leftparen(exprstatestack * p_stack, + rp * p_emit_expr) + { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + if (!this->admits_leftparen()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected leftparen '(' for parsing state", + xtag("state", *this))); + } + + if (this->exs_type_ == exprstatetype::expect_rhs_expression) { + /* push lparen_0 to remember to look for subsequent rightparen */ + p_stack->push_exprstate(exprstatetype::lparen_0); + + p_stack->push_exprstate(exprstatetype::expect_rhs_expression); + } + } +#endif + void exprstate::on_f64(const token_type & tk, exprstatestack * p_stack, - rp * p_emit_expr) + rp * /*p_emit_expr*/) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); @@ -441,11 +643,9 @@ namespace xo { } if (this->exs_type_ == exprstatetype::expect_rhs_expression) { - p_stack->pop_exprstate(); - - p_stack->top_exprstate().on_expr(Constant::make(tk.f64_value()), - p_stack, - p_emit_expr); + p_stack->push_exprstate + (exprstate::make_expr_progress + (Constant::make(tk.f64_value()))); } else { assert(false); } @@ -461,7 +661,7 @@ namespace xo { log && log(xtag("tk", tk)); log && log(xtag("state", *this)); - switch(tk.tk_type()) { + switch (tk.tk_type()) { case tokentype::tk_def: this->on_def(p_stack); @@ -503,10 +703,13 @@ namespace xo { return; case tokentype::tk_doublecolon: - case tokentype::tk_semicolon: assert(false); return; + case tokentype::tk_semicolon: + this->on_semicolon(p_stack, p_emit_expr); + return; + case tokentype::tk_singleassign: this->on_singleassign(p_stack); return; @@ -538,7 +741,13 @@ namespace xo { exprstatestack * p_stack, rp * p_emit_expr) { - switch(this->exs_type_) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + log && log(xtag("exstype", this->exs_type_), + xtag("expr", expr)); + + switch (this->exs_type_) { case exprstatetype::expect_toplevel_expression_sequence: /* toplevel expression sequence accepts an * arbitrary number of expressions. @@ -546,7 +755,7 @@ namespace xo { * parser::include_token() returns */ - *p_emit_expr = expr.get(); + *p_emit_expr = expr.promote(); return; case exprstatetype::def_0: case exprstatetype::def_1: @@ -580,7 +789,17 @@ namespace xo { return; } - case exprstatetype::expect_rhs_expression: + case exprstatetype::expect_rhs_expression: { + + p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */ + + p_stack->top_exprstate().on_expr(expr, + p_stack, + p_emit_expr); + + return; + } + case exprstatetype::expect_type: case exprstatetype::expect_symbol: /* unreachable @@ -588,13 +807,18 @@ namespace xo { */ assert(false); return; + case exprstatetype::expr_progress: + /* consecutive expressions isn't legal + */ + assert(false); + return; case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ assert(false); return; } - } + } /*on_expr*/ void exprstate::on_symbol(const std::string & symbol_name, @@ -634,6 +858,9 @@ namespace xo { */ assert(false); return; + case exprstatetype::expr_progress: + assert(false); + return; case exprstatetype::invalid: case exprstatetype::n_exprstatetype: /* unreachable */ @@ -645,9 +872,11 @@ namespace xo { void exprstate::print(std::ostream & os) const { os << ""; } @@ -667,6 +896,10 @@ namespace xo { void exprstatestack::push_exprstate(const exprstate & exs) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag), + xtag("exs", exs)); + std::size_t z = stack_.size(); stack_.resize(z+1); @@ -676,6 +909,10 @@ namespace xo { void exprstatestack::pop_exprstate() { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag), + xtag("top.exstype", top_exprstate().exs_type())); + std::size_t z = stack_.size(); if (z > 0) @@ -714,7 +951,7 @@ namespace xo { parser::include_token(const token_type & tk) { constexpr bool c_debug_flag = true; - scope log(XO_DEBUG(c_debug_flag)); + scope log(XO_DEBUG(c_debug_flag), xtag("tk", tk)); if (xs_stack_.empty()) { throw std::runtime_error(tostr("parser::include_token", @@ -729,6 +966,7 @@ namespace xo { xs_stack_.top_exprstate().on_input(tk, &xs_stack_, &retval); + log && log(xtag("retval", retval)); return retval; } /*include_token*/ diff --git a/src/reader/reader.cpp b/src/reader/reader.cpp index c24ebf94..d08b9552 100644 --- a/src/reader/reader.cpp +++ b/src/reader/reader.cpp @@ -17,6 +17,9 @@ namespace xo { reader_result reader::read_expr(const span_type & input_arg, bool eof) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + span_type input = input_arg; /* input text-span consumed by this call. @@ -32,7 +35,13 @@ namespace xo { const auto & tk = sr.first; const span_type & used_span = sr.second; + log && log(xtag("used_span", used_span)); + log && log(xtag("input.pre", input)); + input = input.after_prefix(used_span); + + log && log(xtag("expr_span.pre", expr_span)); + expr_span += used_span; if (tk.is_valid()) { @@ -40,6 +49,9 @@ namespace xo { auto expr = this->parser_.include_token(tk); if (expr) { + log && log(xtag("outcome", "victory!"), + xtag("expr", expr)); + /* token completes an expression -> victory */ return reader_result(expr, expr_span); } else { @@ -48,7 +60,6 @@ namespace xo { * * input span may contain more tokens -> iterate */ - input = input.after_prefix(used_span); } } else { assert(input.empty()); @@ -76,6 +87,8 @@ namespace xo { } } + log && log(xtag("outcome", "noop")); + return reader_result(nullptr, expr_span); } diff --git a/utest/parser.test.cpp b/utest/parser.test.cpp index aa230c7a..58910bda 100644 --- a/utest/parser.test.cpp +++ b/utest/parser.test.cpp @@ -201,7 +201,43 @@ namespace xo { cerr << "parser state after [def foo : f64 = 3.14159265]" << endl; cerr << parser << endl; - REQUIRE(r6.get() != nullptr); + REQUIRE(r6.get() == nullptr); + + /* stack should be + * + * expect_toplevel_expression_sequence + */ + CHECK(parser.stack_size() == 4); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) == exprstatetype::expr_progress); + if (parser.stack_size() > 1) + CHECK(parser.i_exstype(1) == exprstatetype::expect_rhs_expression); + if (parser.stack_size() > 2) + CHECK(parser.i_exstype(2) == exprstatetype::def_4); + if (parser.stack_size() > 3) + CHECK(parser.i_exstype(3) + == exprstatetype::expect_toplevel_expression_sequence); + } + + /* input: + * + * i_tc==0: + * def foo = 3.14159265 ; + * ^ ^ + * 0 1 + * + * i_tc==1: + * def foo : f64 = 3.14159265 ; + * ^ ^ + * 0 1 + */ + { + auto r7 = parser.include_token(token_type::semicolon()); + + cerr << "parser state after [def foo : f64 = 3.14159265;]" << endl; + cerr << parser << endl; + + REQUIRE(r7.get() != nullptr); CHECK(parser.stack_size() == 1); diff --git a/utest/reader.test.cpp b/utest/reader.test.cpp index 376dbf05..eff43622 100644 --- a/utest/reader.test.cpp +++ b/utest/reader.test.cpp @@ -18,11 +18,20 @@ namespace xo { rdr.begin_translation_unit(); try { - auto rr = rdr.read_expr(reader::span_type::from_cstr("def foo : f64 = 3.14159265"), - true /*eof*/); + auto input + = reader::span_type::from_cstr("def foo : f64 = 3.14159265;"); + auto rr + = rdr.read_expr(input, true /*eof*/); REQUIRE(rr.expr_.get()); - REQUIRE(rr.rem_.empty()); + + log && log(xtag("expr", rr.expr_)); + + input = input.after_prefix(rr.rem_); + + log && log(xtag("post.input", input)); + + REQUIRE(input.empty()); } catch (std::exception & ex) { log && log(ex.what());