diff --git a/xo-reader/include/xo/reader/define_xs.hpp b/xo-reader/include/xo/reader/define_xs.hpp index a2c2616a..d34b3254 100644 --- a/xo-reader/include/xo/reader/define_xs.hpp +++ b/xo-reader/include/xo/reader/define_xs.hpp @@ -80,6 +80,9 @@ namespace xo { defexprstatetype defxs_type() const { return defxs_type_; } + /** @return expected input in current state **/ + virtual const char * get_expect_str() const override; + virtual void on_expr(bp expr, parserstatemachine * p_psm) override; virtual void on_expr_with_semicolon(bp expr, diff --git a/xo-reader/include/xo/reader/exprstate.hpp b/xo-reader/include/xo/reader/exprstate.hpp index 2ea52732..529b6a06 100644 --- a/xo-reader/include/xo/reader/exprstate.hpp +++ b/xo-reader/include/xo/reader/exprstate.hpp @@ -105,6 +105,9 @@ namespace xo { void on_input(const token_type & tk, parserstatemachine * p_psm); + /** @return string describing expected/allowed input in current state **/ + virtual const char * get_expect_str() const; + /** update exprstate in response to a successfully-parsed subexpression **/ virtual void on_expr(bp expr, parserstatemachine * p_psm); @@ -189,6 +192,57 @@ namespace xo { void illegal_input_error(const char * self_name, const token_type & tk) const; + /** throw exception when next token is inconsistent with + * parsing state + * + * @p self_name error detected in this (c++) function + * @p expr offending input expression + * @p expect_str indicate expected input in this state + * @p p_psm parser state machine + **/ + void illegal_input_on_expr(const char * self_name, + bp expr, + const char * expect_str, + parserstatemachine * p_psm) const; + + /** throw exception when next token is inconsistent with + * parsing state + * + * @p self_name error detected in this (c++) function + * @p tk offending input token + * @p expect_str indicate expected input in this state + * @p p_psm parser state machine + **/ + void illegal_input_on_token(const char * self_name, + const token_type & tk, + const char * expect_str, + parserstatemachine * p_psm) const; + + /** throw exception when next token is inconsistent with + * parsing state + * + * @p self_name error detected in this (c++) function + * @p symbol_name offending symbol name + * @p expect_str indicate expected input in this state + * @p p_psm parser state machine + **/ + void illegal_input_on_symbol(const char * self_name, + const std::string & symbol_name, + const char * expect_str, + parserstatemachine * p_psm) const; + + /** error when typename not expected in current parsing state + * + * @p self_name error detected in this (c++) function + * @p symbol_name offending symbol name + * @p expect_str indicate expected input in this state + * @p p_psm parser state machine + **/ + void illegal_input_on_type(const char * self_name, + TypeDescr td, + const char * expect_str, + parserstatemachine * p_psm) const; + /** capture error in @p *p_psm when unable to locate definition for a variable **/ void unknown_variable_error(const char * self_name, const token_type & tk, diff --git a/xo-reader/src/reader/define_xs.cpp b/xo-reader/src/reader/define_xs.cpp index 6a8f4c7c..30dcffa2 100644 --- a/xo-reader/src/reader/define_xs.cpp +++ b/xo-reader/src/reader/define_xs.cpp @@ -58,6 +58,47 @@ namespace xo { def_expr_{std::move(def_expr)} {} + const char * + define_xs::get_expect_str() const + { + /* + * def foo = 1 ; + * def foo : f64 = 1 ; + * ^ ^ ^ ^ ^ ^ ^ ^ + * | | | | | | | (done) + * | | | | | | def_6 + * | | | | | def_5:expect_rhs_expression + * | | | | def_4 + * | | | def_3:expect_type + * | | def_2 + * | def_1:expect_symbol + * expect_toplevel_expression_sequence + * + * note that we skip from def_2 -> def_5 if '=' instead of ':' + */ + switch (this->defxs_type_) { + case defexprstatetype::invalid: + case defexprstatetype::def_0: + case defexprstatetype::n_defexprstatetype: + assert(false); // impossible + return nullptr; + case defexprstatetype::def_1: + return "symbol"; + case defexprstatetype::def_2: + return "singleassign|colon"; + case defexprstatetype::def_4: + return "singleassign"; + case defexprstatetype::def_3: + return "type"; + case defexprstatetype::def_5: + return "expression"; + case defexprstatetype::def_6: + return "semicolon"; + } + + return "?expect"; + } + void define_xs::on_expr(bp expr, parserstatemachine * p_psm) @@ -85,9 +126,12 @@ namespace xo { rp def_expr = this->def_expr_; this->defxs_type_ = defexprstatetype::def_6; - } else { - exprstate::on_expr(expr, p_psm); } + + constexpr const char * c_self_name = "define_xs::on_expr"; + const char * exp = get_expect_str(); + + this->illegal_input_on_expr(c_self_name, expr, exp, p_psm); } void @@ -117,9 +161,12 @@ namespace xo { this->defxs_type_ = defexprstatetype::def_2; this->def_expr_->assign_lhs_name(symbol_name); return; - } else { - exprstate::on_symbol(symbol_name, p_psm); } + + constexpr const char * c_self_name = "define_xs::on_symbol"; + const char * exp = this->get_expect_str(); + + this->illegal_input_on_symbol(c_self_name, symbol_name, exp, p_psm); } void @@ -136,12 +183,13 @@ namespace xo { this->cvt_expr_ = ConvertExprAccess::make(td /*dest_type*/, nullptr /*source_expr*/); this->def_expr_->assign_rhs(this->cvt_expr_); - //this->def_lhs_td_ = td; - return; - } else { - exprstate::on_typedescr(td, p_psm); } + + constexpr const char * c_self_name = "define_xs::on_symbol"; + const char * exp = this->get_expect_str(); + + this->illegal_input_on_type(c_self_name, td, exp, p_psm); } void @@ -157,9 +205,13 @@ namespace xo { this->defxs_type_ = defexprstatetype::def_1; expect_symbol_xs::start(p_psm); - } else { - exprstate::on_def_token(tk, p_psm); + return; } + + constexpr const char * c_self_name = "define_xs::on_symbol"; + const char * exp = this->get_expect_str(); + + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void @@ -175,9 +227,13 @@ namespace xo { this->defxs_type_ = defexprstatetype::def_3; expect_type_xs::start(p_psm); - } else { - exprstate::on_colon_token(tk, p_psm); + return; } + + constexpr const char * c_self_name = "define_xs::on_symbol"; + const char * exp = this->get_expect_str(); + + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void @@ -223,9 +279,13 @@ namespace xo { } p_psm->top_exprstate().on_expr(def_expr, p_psm); - } else { - exprstate::on_semicolon_token(tk, p_psm); + return; } + + constexpr const char * c_self_name = "define_xs::on_symbol"; + const char * exp = this->get_expect_str(); + + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void @@ -235,70 +295,59 @@ namespace xo { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "define_xs::on_singleassign_token"; - log && log("defxs_type", defxs_type_); - /* - * def foo = 1 ; - * def foo : f64 = 1 ; - * ^ ^ ^ ^ ^ ^ ^ ^ - * | | | | | | | (done) - * | | | | | | def_6 - * | | | | | def_5:expect_rhs_expression - * | | | | def_4 - * | | | def_3:expect_type - * | | def_2 - * | def_1:expect_symbol - * expect_toplevel_expression_sequence - * - * note that we skip from def_2 -> def_5 if '=' instead of ':' - */ if ((this->defxs_type_ == defexprstatetype::def_2) || (this->defxs_type_ == defexprstatetype::def_4)) { this->defxs_type_ = defexprstatetype::def_5; - expect_expr_xs::start(p_psm); - } else { - this->illegal_input_error(self_name, tk); + return; } + + constexpr const char * c_self_name = "define_xs::on_singleassign_token"; + const char * exp = get_expect_str(); + + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void define_xs::on_rightparen_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "define_xs::on_rightparen"; + constexpr const char * c_self_name = "define_xs::on_rightparen"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void define_xs::on_i64_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "define_xs::on_i64"; + constexpr const char * c_self_name = "define_xs::on_i64"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void define_xs::on_f64_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "define_xs::on_f64"; + constexpr const char * c_self_name = "define_xs::on_f64"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void diff --git a/xo-reader/src/reader/exprstate.cpp b/xo-reader/src/reader/exprstate.cpp index 20596b27..ca8ce179 100644 --- a/xo-reader/src/reader/exprstate.cpp +++ b/xo-reader/src/reader/exprstate.cpp @@ -88,8 +88,9 @@ namespace xo { log && log(xtag("exstype", p_psm->top_exprstate().exs_type())); constexpr const char * c_self_name = "exprstate::on_symbol_token"; + const char * exp = get_expect_str(); - this->illegal_input_error(c_self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void @@ -154,133 +155,144 @@ namespace xo { void exprstate::on_colon_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_colon"; + constexpr const char * c_self_name = "exprstate::on_colon"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_comma_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_comma"; + constexpr const char * c_self_name = "exprstate::on_comma"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_semicolon_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_semicolon"; + constexpr const char * c_self_name = "exprstate::on_semicolon"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_singleassign_token(const token_type & tk, - parserstatemachine * /*p_psm*/) { + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_singleassign_token"; + constexpr const char * c_self_name = "exprstate::on_singleassign_token"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_leftparen_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_leftparen_token"; + constexpr const char * c_self_name = "exprstate::on_leftparen_token"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_rightparen_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_rightparen"; + constexpr const char * c_self_name = "exprstate::on_rightparen"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_leftbrace_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_leftbrace_token"; + constexpr const char * c_self_name = "exprstate::on_leftbrace_token"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_rightbrace_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_rightbrace_token"; + constexpr const char * c_self_name = "exprstate::on_rightbrace_token"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_operator_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_operator_token"; + constexpr const char * c_self_name = "exprstate::on_operator_token"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_i64_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_i64"; + constexpr const char * c_self_name = "exprstate::on_i64"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void exprstate::on_f64_token(const token_type & tk, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - constexpr const char * self_name = "exprstate::on_f64"; + constexpr const char * c_self_name = "exprstate::on_f64"; + const char * exp = get_expect_str(); - this->illegal_input_error(self_name, tk); + this->illegal_input_on_token(c_self_name, tk, exp, p_psm); } void @@ -395,9 +407,15 @@ namespace xo { assert(false); } + const char * + exprstate::get_expect_str() const + { + return "?expect"; + } + void exprstate::on_expr(bp expr, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); @@ -405,33 +423,31 @@ namespace xo { log && log(xtag("exstype", this->exs_type_), xtag("expr", expr)); - assert(false); + constexpr const char * c_self_name = "exprstate::on_expr"; + const char * exp = get_expect_str(); + + this->illegal_input_on_expr(c_self_name, expr, exp, p_psm); } /*on_expr*/ void exprstate::on_expr_with_semicolon(bp expr, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - const char * c_self_name = "exprstate::on_expr_with_semicolon"; - log && log(xtag("exstype", this->exs_type_), xtag("expr", expr)); - throw std::runtime_error - (tostr(c_self_name, - ": unexpected expression for parsing state", - xtag("expr", expr), - xtag("state", *this))); + constexpr const char * c_self_name = "exprstate::on_expr_with_semicolon"; + const char * exp = get_expect_str(); - assert(false); + this->illegal_input_on_expr(c_self_name, expr, exp, p_psm); } /*on_expr_with_semicolon*/ void exprstate::on_symbol(const std::string & symbol_name, - parserstatemachine * /*p_psm*/) + parserstatemachine * p_psm) { /* unreachable - derived class that can receive * will override this method @@ -442,7 +458,10 @@ namespace xo { log && log(xtag("exstype", this->exs_type_), xtag("symbol_name", symbol_name)); - assert(false); + constexpr const char * c_self_name = "exprstate::on_symbol"; + const char * exp = get_expect_str(); + + this->illegal_input_on_symbol(c_self_name, symbol_name, exp, p_psm); } void @@ -464,6 +483,63 @@ namespace xo { xtag("state", *this))); } + void + exprstate::illegal_input_on_expr(const char * self_name, + bp expr, + const char * expect_str, + parserstatemachine * p_psm) const + { + std::string errmsg = tostr("unexpected expression for parsing state", + xtag("expecting", expect_str), + xtag("expr", expr), + xtag("state", this->exs_type())); + + p_psm->on_error(self_name, std::move(errmsg)); + } + + void + exprstate::illegal_input_on_token(const char * self_name, + const token_type & tk, + const char * expect_str, + parserstatemachine * p_psm) const + { + std::string errmsg = tostr("unexpected token for parsing state", + xtag("expecting", expect_str), + xtag("token", tk.tk_type()), + xtag("text", tk.text()), + xtag("state", this->exs_type())); + + p_psm->on_error(self_name, std::move(errmsg)); + } + + void + exprstate::illegal_input_on_symbol(const char * self_name, + const std::string & symbol, + const char * expect_str, + parserstatemachine * p_psm) const + { + std::string errmsg = tostr("unexpected symbol", + xtag("expecting", expect_str), + xtag("symbol", symbol), + xtag("state", this->exs_type())); + + p_psm->on_error(self_name, std::move(errmsg)); + } + + void + exprstate::illegal_input_on_type(const char * self_name, + const TypeDescr td, + const char * expect_str, + parserstatemachine * p_psm) const + { + std::string errmsg = tostr("unexpected type", + xtag("expecting", expect_str), + xtag("type", td), + xtag("state", this->exs_type())); + + p_psm->on_error(self_name, std::move(errmsg)); + } + void exprstate::unknown_variable_error(const char * self_name, const token_type & tk, diff --git a/xo-tokenizer/include/xo/tokenizer/token.hpp b/xo-tokenizer/include/xo/tokenizer/token.hpp index 906ad093..5a053d11 100644 --- a/xo-tokenizer/include/xo/tokenizer/token.hpp +++ b/xo-tokenizer/include/xo/tokenizer/token.hpp @@ -159,6 +159,12 @@ namespace xo { /** true for sentinel token with type tk_invalid **/ bool is_invalid() const { return tk_type_ == tokentype::tk_invalid; } + /** true for tokens with variable text. false for those with fixed textual representation **/ + bool has_variable_text() const { return (tk_type_ == tokentype::tk_i64 + || tk_type_ == tokentype::tk_f64 + || tk_type_ == tokentype::tk_string + || tk_type_ == tokentype::tk_symbol); } + /** expect input matching @c "[+|-][0-9][0-9]*" **/ std::int64_t i64_value() const; @@ -406,9 +412,10 @@ namespace xo { void token::print(std::ostream & os) const { os << ""; + << xtag("type", tk_type_); + if (has_variable_text()) + os << xtag("text", text_); + os << ">"; } /*print*/ template