detailed parser error reporting [wip - 1 example]

This commit is contained in:
Roland Conybeare 2025-07-19 21:09:57 -05:00
commit 65a83cd77c
19 changed files with 256 additions and 84 deletions

View file

@ -2,6 +2,7 @@
set(SELF_LIB xo_reader)
set(SELF_SRCS
parser_result.cpp
parser.cpp
parserstatemachine.cpp
reader.cpp

View file

@ -37,8 +37,6 @@ namespace xo {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
//constexpr const char * c_self_name = "exprseq_xs::on_def_token";
define_xs::start(p_psm);
/* keyword 'def' introduces a definition:
@ -68,7 +66,7 @@ namespace xo {
if (var.get()) {
progress_xs::start(var.promote(), p_psm);
} else {
this->unknown_variable_error(c_self_name, tk);
this->unknown_variable_error(c_self_name, tk, p_psm);
}
} else {
/* policy: don't allow variable references as toplevel expressions
@ -117,10 +115,9 @@ namespace xo {
* arbitrary number of expressions.
*/
auto p_emit_expr = p_psm->p_emit_expr_;
*p_emit_expr = expr.promote();
} /*on_expr*/
*(p_psm->p_result_) = parser_result::expression(expr.promote());
}
void
exprseq_xs::on_expr_with_semicolon(bp<Expression> expr,
@ -132,9 +129,7 @@ namespace xo {
* semicolons are sometimes mandatory to avoid ambiguity.
*/
auto p_emit_expr = p_psm->p_emit_expr_;
*p_emit_expr = expr.promote();
*(p_psm->p_result_) = parser_result::expression(expr.promote());
}
} /*namespace scm*/

View file

@ -466,12 +466,13 @@ namespace xo {
void
exprstate::unknown_variable_error(const char * self_name,
const token_type & tk) const
const token_type & tk,
parserstatemachine * p_psm) const
{
throw std::runtime_error
(tostr(self_name,
": unknown variable name",
xtag("var", tk.text())));
std::string errmsg = tostr("unknown variable name",
xtag("var", tk.text()));
p_psm->on_error(self_name, std::move(errmsg));
}
} /*namespace scm*/
} /*namespace xo*/

View file

@ -30,7 +30,7 @@ namespace xo {
// ----- parser -----
parser::parser()
: xs_stack_{}, env_stack_{}
: xs_stack_{}, env_stack_{}, result_{}
{
/* top-level environment. initially empty */
rp<LocalEnv> toplevel_env = LocalEnv::make_empty();
@ -46,25 +46,23 @@ namespace xo {
void
parser::begin_interactive_session() {
/* note: not using emit expr here */
parserstatemachine psm(&xs_stack_,
&env_stack_,
nullptr /*p_emit_expr*/);
&result_);
exprseq_xs::start(exprseqtype::toplevel_interactive, &psm);
}
void
parser::begin_translation_unit() {
/* note: not using emit expr here */
parserstatemachine psm(&xs_stack_,
&env_stack_,
nullptr /*p_emit_expr*/);
&result_);
exprseq_xs::start(exprseqtype::toplevel_batch, &psm);
}
rp<Expression>
const parser_result &
parser::include_token(const token_type & tk)
{
constexpr bool c_debug_flag = true;
@ -81,15 +79,11 @@ namespace xo {
log && log(xtag("top", xs_stack_.top_exprstate()));
rp<Expression> retval;
parserstatemachine psm(&xs_stack_, &env_stack_, &retval);
parserstatemachine psm(&xs_stack_, &env_stack_, &result_);
xs_stack_.top_exprstate().on_input(tk, &psm);
log && log(xtag("retval", retval));
return retval;
return result_;
} /*include_token*/
void
@ -97,6 +91,7 @@ namespace xo {
{
xs_stack_.reset_to_toplevel();
env_stack_.reset_to_toplevel();
result_ = parser_result::none();
} /*discard_current_state*/
void

View file

@ -0,0 +1,44 @@
/* file parser_result.cpp
*
* author: Roland Conybeare, Jul 2025
*/
#include "parser_result.hpp"
namespace xo {
namespace scm {
parser_result::parser_result(parser_result_type type, rp<Expression> expr, const char * src_fn, std::string errmsg)
: result_type_{type}, result_expr_{std::move(expr)}, error_src_function_{src_fn}, error_description_{std::move(errmsg)}
{}
parser_result
parser_result::none()
{
return parser_result(parser_result_type::none,
nullptr,
nullptr,
std::string());
}
parser_result
parser_result::error(const char * error_src_function,
std::string errmsg)
{
return parser_result(parser_result_type::error,
nullptr,
error_src_function,
std::move(errmsg));
}
parser_result
parser_result::expression(rp<Expression> expr)
{
return parser_result(parser_result_type::expression,
std::move(expr),
nullptr /*src_function*/,
std::string());
}
} /*namespace scm*/
} /*namespace xo*/
/* end parser_result.cpp */

View file

@ -156,12 +156,17 @@ namespace xo {
->top_exprstate().on_rightbrace_token(tk, this);
}
void
parserstatemachine::on_error(const char * self_name, std::string errmsg)
{
*(this->p_result_) = parser_result::error(self_name, std::move(errmsg));
}
void
parserstatemachine::print(std::ostream & os) const {
os << "<psm";
os << xtag("stack", p_stack_);
os << xtag("env_stack", p_env_stack_);
os << xtag("emit_expr", p_emit_expr_);
os << ">";
}
} /*namespace scm*/

View file

@ -25,15 +25,11 @@ namespace xo {
if (!pps->print_upto_tag("env_stack", x.p_env_stack_))
return false;
if (!pps->print_upto_tag("emit_expr", (void*)x.p_emit_expr_))
return false;
return pps->print_upto(">");
} else {
pps->write("<psm");
pps->newline_pretty_tag(ppii.ci1(), "stack", x.p_stack_);
pps->newline_pretty_tag(ppii.ci1(), "env_stack", x.p_env_stack_);
pps->newline_pretty_tag(ppii.ci1(), "emit_expr", (void*)x.p_emit_expr_);
pps->write(">");
return false;

View file

@ -43,7 +43,7 @@ namespace xo {
/* each loop iterations reads one token */
/* read one token from input */
auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
auto [tk, used_span, error1] = this->tokenizer_.scan2(input, eof);
log && log(xtag("consumed", used_span));
log && log(xtag("input.pre", input));
@ -53,45 +53,60 @@ namespace xo {
if (tk.is_valid()) {
/* forward just-read token to parser */
auto expr = this->parser_.include_token(tk);
auto parser_result = this->parser_.include_token(tk);
if (expr) {
if (parser_result.is_expression()) {
log && log(xtag("outcome", "victory!"),
xtag("expr", expr));
xtag("expr", parser_result.result_expr()));
/* token completes an expression -> victory */
return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
return reader_result(parser_result.result_expr(),
expr_span, parser_.stack_size(), reader_error());
} else if (parser_result.is_error()) {
/* 1. parser detected error.
* 2. tokenizer_.input_state() refers to position just after offending token
* 3. error_pos here is 0 because error detected at token boundary
*/
reader_error error2(parser_result.error_src_function(),
parser_result.error_description(),
tokenizer_.input_state().rewind(tk.text().size()),
0 /*error_pos*/);
std::cout << "parser error pre-report:" << std::endl;
error2.report(std::cout);
return reader_result(nullptr, expr_span, parser_.stack_size(), error2);
} else {
/* token did not complete an expression
* (e.g. token for '[')
*
* input span may conotain more tokens -> iterate
* input span may contain more tokens -> iterate
*/
}
} else {
if (error.is_error()) {
if (error1.is_error()) {
/* tokenizer detected an error */
std::cout << "tokenizer error pre-report:" << std::endl;
error.report(std::cout);
error1.report(std::cout);
return reader_result(nullptr, expr_span, parser_.stack_size(),
reader_error(error.src_function(),
error.error_description(),
error.input_state(),
error.error_pos()));
reader_error(error1.src_function(),
error1.error_description(),
error1.input_state(),
error1.error_pos()));
} else {
/* control should not come here */
assert(input.empty());
}
/* ono more tokens in input */
/* need more tokens in input */
break;
}
}
/* control here: either
/* control here: eithero
* 1. input.empty (perhaps ate some whitespace, ok)
* 2. missing or incomplete token (ok unless eof)
*/