diff --git a/include/xo/reader/exprseq_xs.hpp b/include/xo/reader/exprseq_xs.hpp index c41e3cf3..bfd82fe2 100644 --- a/include/xo/reader/exprseq_xs.hpp +++ b/include/xo/reader/exprseq_xs.hpp @@ -29,12 +29,14 @@ namespace xo { * 2. top-level batch * 3. nested * + * @text * 1 2 3 * +-------- * def | y y y * symbol | y n n 1: evaluate as variable * i64 | y n n 1: evaluate as constant * + * @endtext **/ class exprseq_xs : public exprstate { public: diff --git a/include/xo/reader/exprstate.hpp b/include/xo/reader/exprstate.hpp index 0d9b7678..2ea52732 100644 --- a/include/xo/reader/exprstate.hpp +++ b/include/xo/reader/exprstate.hpp @@ -189,10 +189,10 @@ namespace xo { void illegal_input_error(const char * self_name, const token_type & tk) const; - /** throw exception when unable to locate definition for a variable - **/ + /** capture error in @p *p_psm when unable to locate definition for a variable **/ void unknown_variable_error(const char * self_name, - const token_type & tk) const; + const token_type & tk, + parserstatemachine * p_psm) const; protected: /** explicit subtype: identifies derived class **/ diff --git a/include/xo/reader/parser.hpp b/include/xo/reader/parser.hpp index 496a8623..323e9ff6 100644 --- a/include/xo/reader/parser.hpp +++ b/include/xo/reader/parser.hpp @@ -7,6 +7,7 @@ #include "exprstatestack.hpp" #include "envframestack.hpp" +#include "parser_result.hpp" #include namespace xo { @@ -210,7 +211,7 @@ namespace xo { * @return parsed expression, if @p tk completes an expression. * otherwise nullptr **/ - rp include_token(const token_type & tk); + const parser_result & include_token(const token_type & tk); /** reset to starting parsing state. * use this after encountering an error, to avoid cascade of @@ -240,6 +241,8 @@ namespace xo { **/ envframestack env_stack_; + /** parser result state **/ + parser_result result_; }; /*parser*/ inline std::ostream & diff --git a/include/xo/reader/parser_result.hpp b/include/xo/reader/parser_result.hpp new file mode 100644 index 00000000..204a29a6 --- /dev/null +++ b/include/xo/reader/parser_result.hpp @@ -0,0 +1,65 @@ +/* file parser_result.hpp + * + * author: Roland Conybeare, Jul 2025 + */ + +#pragma once + +#include "xo/expression/Expression.hpp" + +namespace xo { + namespace scm { + enum parser_result_type { + /** no result yet (incomplete expression) **/ + none, + /** emit expression **/ + expression, + /** emit error **/ + error + }; + + struct parser_result { + using Expression = xo::ast::Expression; + + public: + parser_result() = default; + parser_result(parser_result_type type, rp expr, const char * src_fn, std::string errmsg); + + static parser_result none(); + static parser_result error(const char * error_src_function, + std::string errmsg); + static parser_result expression(rp expr); + + bool is_none() const { return result_type_ == parser_result_type::none; } + bool is_expression() const { return result_type_ == parser_result_type::expression; } + bool is_error() const { return result_type_ == parser_result_type::error; } + + parser_result_type result_type() const { return result_type_; } + const rp & result_expr() const { return result_expr_; } + const char * error_src_function() const { return error_src_function_; } + const std::string & error_description() const { return error_description_; } + + public: + /** none|expression|error_description + * + * @text + * result_type | error_src_function | error_description + * -------------+--------------------+------------------- + * none | nullptr | empty + * expression | nullptr | empty + * error | non-null | non-empty + * @endtext + **/ + parser_result_type result_type_ = parser_result_type::none; + /** if @ref result_state is parser_result_state::expression -> non-null result expression **/ + rp result_expr_; + /** if @ref result_state is parser_result_state::error -> non-null source function **/ + const char * error_src_function_ = nullptr; + /** if @ref result_state is parser_result_state::error -> non-empty error description **/ + std::string error_description_; + }; + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end parser_result.hpp */ diff --git a/include/xo/reader/parserstatemachine.hpp b/include/xo/reader/parserstatemachine.hpp index 490ac35c..49ec9cef 100644 --- a/include/xo/reader/parserstatemachine.hpp +++ b/include/xo/reader/parserstatemachine.hpp @@ -7,6 +7,7 @@ #include "exprstate.hpp" #include "envframestack.hpp" +#include "parser_result.hpp" namespace xo { namespace scm { @@ -27,10 +28,17 @@ namespace xo { public: parserstatemachine(exprstatestack * p_stack, envframestack * p_env_stack, - rp * p_emit_expr) + parser_result * p_result) : p_stack_{p_stack}, p_env_stack_{p_env_stack}, - p_emit_expr_{p_emit_expr} {} + p_result_{p_result} + {} + + //const parser_result & result() const { return result_; } + //parser_result_state result_state() const { return result_state_; } + //const rp & result_expr() const { return result_expr_; } + //const char * error_src_function() const { return error_src_function_; } + //const std::string & error_description() const { return error_description_; } std::unique_ptr pop_exprstate(); exprstate & top_exprstate(); @@ -57,13 +65,19 @@ namespace xo { void on_expr_with_semicolon(bp expr); void on_symbol(const std::string & symbol); - // ---- parsing inputs ----- + // ----- parsing inputs ----- void on_semicolon_token(const token_type & tk); void on_operator_token(const token_type & tk); void on_leftbrace_token(const token_type & tk); void on_rightbrace_token(const token_type & tk); + // ----- parsing error ----- + + /** @p self_name location (implementation function) where error detected + **/ + void on_error(const char * self_name, std::string error_description); + /** write human-readable representation on @p os **/ void print(std::ostream & os) const; @@ -72,13 +86,11 @@ namespace xo { * generally speaking, push when to start new work for nested content; * pop when work complete **/ - exprstatestack * p_stack_; + exprstatestack * p_stack_ = nullptr; /** stack of environment frames, one for each enclosing lambda **/ - envframestack * p_env_stack_; - /** if non-null, store next non-nested complete expressions in - * *p_emit_expr - **/ - rp * p_emit_expr_; + envframestack * p_env_stack_ = nullptr; + /** parser result object **/ + parser_result * p_result_ = nullptr; }; inline std::ostream & diff --git a/include/xo/reader/reader_error.hpp b/include/xo/reader/reader_error.hpp index d23502d6..d61db03e 100644 --- a/include/xo/reader/reader_error.hpp +++ b/include/xo/reader/reader_error.hpp @@ -3,6 +3,8 @@ * Author: Roland Conybeare, Jul 2025 */ +#pragma once + #include "xo/tokenizer/tokenizer_error.hpp" namespace xo { @@ -18,7 +20,7 @@ namespace xo { * @ **/ reader_error(const char * src_function, - const char * error_description, + std::string error_description, const input_state_type & input_state, size_t error_pos) : tk_error_{src_function, error_description, input_state, error_pos} diff --git a/src/reader/CMakeLists.txt b/src/reader/CMakeLists.txt index 9024a19a..7314e974 100644 --- a/src/reader/CMakeLists.txt +++ b/src/reader/CMakeLists.txt @@ -2,6 +2,7 @@ set(SELF_LIB xo_reader) set(SELF_SRCS + parser_result.cpp parser.cpp parserstatemachine.cpp reader.cpp diff --git a/src/reader/exprseq_xs.cpp b/src/reader/exprseq_xs.cpp index 2dc758c6..72d06e5e 100644 --- a/src/reader/exprseq_xs.cpp +++ b/src/reader/exprseq_xs.cpp @@ -37,8 +37,6 @@ namespace xo { constexpr bool c_debug_flag = true; scope log(XO_DEBUG(c_debug_flag)); - //constexpr const char * c_self_name = "exprseq_xs::on_def_token"; - define_xs::start(p_psm); /* keyword 'def' introduces a definition: @@ -68,7 +66,7 @@ namespace xo { if (var.get()) { progress_xs::start(var.promote(), p_psm); } else { - this->unknown_variable_error(c_self_name, tk); + this->unknown_variable_error(c_self_name, tk, p_psm); } } else { /* policy: don't allow variable references as toplevel expressions @@ -117,10 +115,9 @@ namespace xo { * arbitrary number of expressions. */ - auto p_emit_expr = p_psm->p_emit_expr_; - *p_emit_expr = expr.promote(); - } /*on_expr*/ + *(p_psm->p_result_) = parser_result::expression(expr.promote()); + } void exprseq_xs::on_expr_with_semicolon(bp expr, @@ -132,9 +129,7 @@ namespace xo { * semicolons are sometimes mandatory to avoid ambiguity. */ - auto p_emit_expr = p_psm->p_emit_expr_; - - *p_emit_expr = expr.promote(); + *(p_psm->p_result_) = parser_result::expression(expr.promote()); } } /*namespace scm*/ diff --git a/src/reader/exprstate.cpp b/src/reader/exprstate.cpp index 13714a38..20596b27 100644 --- a/src/reader/exprstate.cpp +++ b/src/reader/exprstate.cpp @@ -466,12 +466,13 @@ namespace xo { void exprstate::unknown_variable_error(const char * self_name, - const token_type & tk) const + const token_type & tk, + parserstatemachine * p_psm) const { - throw std::runtime_error - (tostr(self_name, - ": unknown variable name", - xtag("var", tk.text()))); + std::string errmsg = tostr("unknown variable name", + xtag("var", tk.text())); + + p_psm->on_error(self_name, std::move(errmsg)); } } /*namespace scm*/ } /*namespace xo*/ diff --git a/src/reader/parser.cpp b/src/reader/parser.cpp index 2d8e3818..85ce0231 100644 --- a/src/reader/parser.cpp +++ b/src/reader/parser.cpp @@ -30,7 +30,7 @@ namespace xo { // ----- parser ----- parser::parser() - : xs_stack_{}, env_stack_{} + : xs_stack_{}, env_stack_{}, result_{} { /* top-level environment. initially empty */ rp toplevel_env = LocalEnv::make_empty(); @@ -46,25 +46,23 @@ namespace xo { void parser::begin_interactive_session() { - /* note: not using emit expr here */ parserstatemachine psm(&xs_stack_, &env_stack_, - nullptr /*p_emit_expr*/); + &result_); exprseq_xs::start(exprseqtype::toplevel_interactive, &psm); } void parser::begin_translation_unit() { - /* note: not using emit expr here */ parserstatemachine psm(&xs_stack_, &env_stack_, - nullptr /*p_emit_expr*/); + &result_); exprseq_xs::start(exprseqtype::toplevel_batch, &psm); } - rp + const parser_result & parser::include_token(const token_type & tk) { constexpr bool c_debug_flag = true; @@ -81,15 +79,11 @@ namespace xo { log && log(xtag("top", xs_stack_.top_exprstate())); - rp retval; - - parserstatemachine psm(&xs_stack_, &env_stack_, &retval); + parserstatemachine psm(&xs_stack_, &env_stack_, &result_); xs_stack_.top_exprstate().on_input(tk, &psm); - log && log(xtag("retval", retval)); - - return retval; + return result_; } /*include_token*/ void @@ -97,6 +91,7 @@ namespace xo { { xs_stack_.reset_to_toplevel(); env_stack_.reset_to_toplevel(); + result_ = parser_result::none(); } /*discard_current_state*/ void diff --git a/src/reader/parser_result.cpp b/src/reader/parser_result.cpp new file mode 100644 index 00000000..bf47f405 --- /dev/null +++ b/src/reader/parser_result.cpp @@ -0,0 +1,44 @@ +/* file parser_result.cpp + * + * author: Roland Conybeare, Jul 2025 + */ + +#include "parser_result.hpp" + +namespace xo { + namespace scm { + parser_result::parser_result(parser_result_type type, rp expr, const char * src_fn, std::string errmsg) + : result_type_{type}, result_expr_{std::move(expr)}, error_src_function_{src_fn}, error_description_{std::move(errmsg)} + {} + + parser_result + parser_result::none() + { + return parser_result(parser_result_type::none, + nullptr, + nullptr, + std::string()); + } + + parser_result + parser_result::error(const char * error_src_function, + std::string errmsg) + { + return parser_result(parser_result_type::error, + nullptr, + error_src_function, + std::move(errmsg)); + } + + parser_result + parser_result::expression(rp expr) + { + return parser_result(parser_result_type::expression, + std::move(expr), + nullptr /*src_function*/, + std::string()); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end parser_result.cpp */ diff --git a/src/reader/parserstatemachine.cpp b/src/reader/parserstatemachine.cpp index feb99d53..660c8ede 100644 --- a/src/reader/parserstatemachine.cpp +++ b/src/reader/parserstatemachine.cpp @@ -156,12 +156,17 @@ namespace xo { ->top_exprstate().on_rightbrace_token(tk, this); } + void + parserstatemachine::on_error(const char * self_name, std::string errmsg) + { + *(this->p_result_) = parser_result::error(self_name, std::move(errmsg)); + } + void parserstatemachine::print(std::ostream & os) const { os << ""; } } /*namespace scm*/ diff --git a/src/reader/pretty_parserstatemachine.cpp b/src/reader/pretty_parserstatemachine.cpp index b4649042..e0f01a81 100644 --- a/src/reader/pretty_parserstatemachine.cpp +++ b/src/reader/pretty_parserstatemachine.cpp @@ -25,15 +25,11 @@ namespace xo { if (!pps->print_upto_tag("env_stack", x.p_env_stack_)) return false; - if (!pps->print_upto_tag("emit_expr", (void*)x.p_emit_expr_)) - return false; - return pps->print_upto(">"); } else { pps->write("newline_pretty_tag(ppii.ci1(), "stack", x.p_stack_); pps->newline_pretty_tag(ppii.ci1(), "env_stack", x.p_env_stack_); - pps->newline_pretty_tag(ppii.ci1(), "emit_expr", (void*)x.p_emit_expr_); pps->write(">"); return false; diff --git a/src/reader/reader.cpp b/src/reader/reader.cpp index eecb206d..6e73400e 100644 --- a/src/reader/reader.cpp +++ b/src/reader/reader.cpp @@ -43,7 +43,7 @@ namespace xo { /* each loop iterations reads one token */ /* read one token from input */ - auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof); + auto [tk, used_span, error1] = this->tokenizer_.scan2(input, eof); log && log(xtag("consumed", used_span)); log && log(xtag("input.pre", input)); @@ -53,45 +53,60 @@ namespace xo { if (tk.is_valid()) { /* forward just-read token to parser */ - auto expr = this->parser_.include_token(tk); + auto parser_result = this->parser_.include_token(tk); - if (expr) { + if (parser_result.is_expression()) { log && log(xtag("outcome", "victory!"), - xtag("expr", expr)); + xtag("expr", parser_result.result_expr())); /* token completes an expression -> victory */ - return reader_result(expr, expr_span, parser_.stack_size(), reader_error()); + return reader_result(parser_result.result_expr(), + expr_span, parser_.stack_size(), reader_error()); + } else if (parser_result.is_error()) { + /* 1. parser detected error. + * 2. tokenizer_.input_state() refers to position just after offending token + * 3. error_pos here is 0 because error detected at token boundary + */ + reader_error error2(parser_result.error_src_function(), + parser_result.error_description(), + tokenizer_.input_state().rewind(tk.text().size()), + 0 /*error_pos*/); + + std::cout << "parser error pre-report:" << std::endl; + error2.report(std::cout); + + return reader_result(nullptr, expr_span, parser_.stack_size(), error2); } else { /* token did not complete an expression * (e.g. token for '[') * - * input span may conotain more tokens -> iterate + * input span may contain more tokens -> iterate */ } } else { - if (error.is_error()) { + if (error1.is_error()) { /* tokenizer detected an error */ std::cout << "tokenizer error pre-report:" << std::endl; - error.report(std::cout); + error1.report(std::cout); return reader_result(nullptr, expr_span, parser_.stack_size(), - reader_error(error.src_function(), - error.error_description(), - error.input_state(), - error.error_pos())); + reader_error(error1.src_function(), + error1.error_description(), + error1.input_state(), + error1.error_pos())); } else { /* control should not come here */ assert(input.empty()); } - /* ono more tokens in input */ + /* need more tokens in input */ break; } } - /* control here: either + /* control here: eithero * 1. input.empty (perhaps ate some whitespace, ok) * 2. missing or incomplete token (ok unless eof) */ diff --git a/utest/parser.test.cpp b/utest/parser.test.cpp index e2190f91..e29d030e 100644 --- a/utest/parser.test.cpp +++ b/utest/parser.test.cpp @@ -37,7 +37,11 @@ namespace xo { */ { auto r1 = parser.include_token(token_type::def()); - REQUIRE(r1.get() == nullptr); + + REQUIRE(r1.is_none()); + REQUIRE(r1.result_expr().get() == nullptr); + REQUIRE(r1.error_src_function() == nullptr); + REQUIRE(r1.error_description().empty()); /* stack should be: * @@ -69,7 +73,10 @@ namespace xo { cerr << "parser state after [def foo]" << endl; cerr << parser << endl; - REQUIRE(r2.get() == nullptr); + REQUIRE(r2.is_none()); + REQUIRE(r2.result_expr().get() == nullptr); + REQUIRE(r2.error_src_function() == nullptr); + REQUIRE(r2.error_description().empty()); /* stack should be: * @@ -102,7 +109,10 @@ namespace xo { cerr << "parser state after [def foo :]" << endl; cerr << parser << endl; - REQUIRE(r3.get() == nullptr); + REQUIRE(r3.is_none()); + REQUIRE(r3.result_expr().get() == nullptr); + REQUIRE(r3.error_src_function() == nullptr); + REQUIRE(r3.error_description().empty()); /* stack should be: * @@ -134,7 +144,10 @@ namespace xo { cerr << "parser state after [def foo : f64]" << endl; cerr << parser << endl; - REQUIRE(r4.get() == nullptr); + REQUIRE(r4.is_none()); + REQUIRE(r4.result_expr().get() == nullptr); + REQUIRE(r4.error_src_function() == nullptr); + REQUIRE(r4.error_description().empty()); CHECK(parser.stack_size() == 2); @@ -178,7 +191,10 @@ namespace xo { cerr << "parser state after [def foo : f64 =]" << endl; cerr << parser << endl; - REQUIRE(r5.get() == nullptr); + REQUIRE(r5.is_none()); + REQUIRE(r5.result_expr().get() == nullptr); + REQUIRE(r5.error_src_function() == nullptr); + REQUIRE(r5.error_description().empty()); CHECK(parser.stack_size() == 3); @@ -219,7 +235,10 @@ namespace xo { cerr << "parser state after [def foo : f64 = 3.14159265]" << endl; cerr << parser << endl; - REQUIRE(r6.get() == nullptr); + REQUIRE(r6.is_none()); + REQUIRE(r6.result_expr().get() == nullptr); + REQUIRE(r6.error_src_function() == nullptr); + REQUIRE(r6.error_description().empty()); /* stack should be * @@ -258,7 +277,10 @@ namespace xo { cerr << "parser state after [def foo : f64 = 3.14159265;]" << endl; cerr << parser << endl; - REQUIRE(r7.get() != nullptr); + REQUIRE(r7.is_expression()); + REQUIRE(r7.result_expr().get() != nullptr); + REQUIRE(r7.error_src_function() == nullptr); + REQUIRE(r7.error_description().empty()); CHECK(parser.stack_size() == 1);