detailed parser error reporting [wip - 1 example]
This commit is contained in:
parent
8c36bbce28
commit
65a83cd77c
19 changed files with 256 additions and 84 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
set(SELF_LIB xo_reader)
|
||||
set(SELF_SRCS
|
||||
parser_result.cpp
|
||||
parser.cpp
|
||||
parserstatemachine.cpp
|
||||
reader.cpp
|
||||
|
|
|
|||
|
|
@ -37,8 +37,6 @@ namespace xo {
|
|||
constexpr bool c_debug_flag = true;
|
||||
scope log(XO_DEBUG(c_debug_flag));
|
||||
|
||||
//constexpr const char * c_self_name = "exprseq_xs::on_def_token";
|
||||
|
||||
define_xs::start(p_psm);
|
||||
|
||||
/* keyword 'def' introduces a definition:
|
||||
|
|
@ -68,7 +66,7 @@ namespace xo {
|
|||
if (var.get()) {
|
||||
progress_xs::start(var.promote(), p_psm);
|
||||
} else {
|
||||
this->unknown_variable_error(c_self_name, tk);
|
||||
this->unknown_variable_error(c_self_name, tk, p_psm);
|
||||
}
|
||||
} else {
|
||||
/* policy: don't allow variable references as toplevel expressions
|
||||
|
|
@ -117,10 +115,9 @@ namespace xo {
|
|||
* arbitrary number of expressions.
|
||||
*/
|
||||
|
||||
auto p_emit_expr = p_psm->p_emit_expr_;
|
||||
|
||||
*p_emit_expr = expr.promote();
|
||||
} /*on_expr*/
|
||||
*(p_psm->p_result_) = parser_result::expression(expr.promote());
|
||||
}
|
||||
|
||||
void
|
||||
exprseq_xs::on_expr_with_semicolon(bp<Expression> expr,
|
||||
|
|
@ -132,9 +129,7 @@ namespace xo {
|
|||
* semicolons are sometimes mandatory to avoid ambiguity.
|
||||
*/
|
||||
|
||||
auto p_emit_expr = p_psm->p_emit_expr_;
|
||||
|
||||
*p_emit_expr = expr.promote();
|
||||
*(p_psm->p_result_) = parser_result::expression(expr.promote());
|
||||
}
|
||||
|
||||
} /*namespace scm*/
|
||||
|
|
|
|||
|
|
@ -466,12 +466,13 @@ namespace xo {
|
|||
|
||||
void
|
||||
exprstate::unknown_variable_error(const char * self_name,
|
||||
const token_type & tk) const
|
||||
const token_type & tk,
|
||||
parserstatemachine * p_psm) const
|
||||
{
|
||||
throw std::runtime_error
|
||||
(tostr(self_name,
|
||||
": unknown variable name",
|
||||
xtag("var", tk.text())));
|
||||
std::string errmsg = tostr("unknown variable name",
|
||||
xtag("var", tk.text()));
|
||||
|
||||
p_psm->on_error(self_name, std::move(errmsg));
|
||||
}
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ namespace xo {
|
|||
// ----- parser -----
|
||||
|
||||
parser::parser()
|
||||
: xs_stack_{}, env_stack_{}
|
||||
: xs_stack_{}, env_stack_{}, result_{}
|
||||
{
|
||||
/* top-level environment. initially empty */
|
||||
rp<LocalEnv> toplevel_env = LocalEnv::make_empty();
|
||||
|
|
@ -46,25 +46,23 @@ namespace xo {
|
|||
|
||||
void
|
||||
parser::begin_interactive_session() {
|
||||
/* note: not using emit expr here */
|
||||
parserstatemachine psm(&xs_stack_,
|
||||
&env_stack_,
|
||||
nullptr /*p_emit_expr*/);
|
||||
&result_);
|
||||
|
||||
exprseq_xs::start(exprseqtype::toplevel_interactive, &psm);
|
||||
}
|
||||
|
||||
void
|
||||
parser::begin_translation_unit() {
|
||||
/* note: not using emit expr here */
|
||||
parserstatemachine psm(&xs_stack_,
|
||||
&env_stack_,
|
||||
nullptr /*p_emit_expr*/);
|
||||
&result_);
|
||||
|
||||
exprseq_xs::start(exprseqtype::toplevel_batch, &psm);
|
||||
}
|
||||
|
||||
rp<Expression>
|
||||
const parser_result &
|
||||
parser::include_token(const token_type & tk)
|
||||
{
|
||||
constexpr bool c_debug_flag = true;
|
||||
|
|
@ -81,15 +79,11 @@ namespace xo {
|
|||
|
||||
log && log(xtag("top", xs_stack_.top_exprstate()));
|
||||
|
||||
rp<Expression> retval;
|
||||
|
||||
parserstatemachine psm(&xs_stack_, &env_stack_, &retval);
|
||||
parserstatemachine psm(&xs_stack_, &env_stack_, &result_);
|
||||
|
||||
xs_stack_.top_exprstate().on_input(tk, &psm);
|
||||
|
||||
log && log(xtag("retval", retval));
|
||||
|
||||
return retval;
|
||||
return result_;
|
||||
} /*include_token*/
|
||||
|
||||
void
|
||||
|
|
@ -97,6 +91,7 @@ namespace xo {
|
|||
{
|
||||
xs_stack_.reset_to_toplevel();
|
||||
env_stack_.reset_to_toplevel();
|
||||
result_ = parser_result::none();
|
||||
} /*discard_current_state*/
|
||||
|
||||
void
|
||||
|
|
|
|||
44
xo-reader/src/reader/parser_result.cpp
Normal file
44
xo-reader/src/reader/parser_result.cpp
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/* file parser_result.cpp
|
||||
*
|
||||
* author: Roland Conybeare, Jul 2025
|
||||
*/
|
||||
|
||||
#include "parser_result.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
parser_result::parser_result(parser_result_type type, rp<Expression> expr, const char * src_fn, std::string errmsg)
|
||||
: result_type_{type}, result_expr_{std::move(expr)}, error_src_function_{src_fn}, error_description_{std::move(errmsg)}
|
||||
{}
|
||||
|
||||
parser_result
|
||||
parser_result::none()
|
||||
{
|
||||
return parser_result(parser_result_type::none,
|
||||
nullptr,
|
||||
nullptr,
|
||||
std::string());
|
||||
}
|
||||
|
||||
parser_result
|
||||
parser_result::error(const char * error_src_function,
|
||||
std::string errmsg)
|
||||
{
|
||||
return parser_result(parser_result_type::error,
|
||||
nullptr,
|
||||
error_src_function,
|
||||
std::move(errmsg));
|
||||
}
|
||||
|
||||
parser_result
|
||||
parser_result::expression(rp<Expression> expr)
|
||||
{
|
||||
return parser_result(parser_result_type::expression,
|
||||
std::move(expr),
|
||||
nullptr /*src_function*/,
|
||||
std::string());
|
||||
}
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end parser_result.cpp */
|
||||
|
|
@ -156,12 +156,17 @@ namespace xo {
|
|||
->top_exprstate().on_rightbrace_token(tk, this);
|
||||
}
|
||||
|
||||
void
|
||||
parserstatemachine::on_error(const char * self_name, std::string errmsg)
|
||||
{
|
||||
*(this->p_result_) = parser_result::error(self_name, std::move(errmsg));
|
||||
}
|
||||
|
||||
void
|
||||
parserstatemachine::print(std::ostream & os) const {
|
||||
os << "<psm";
|
||||
os << xtag("stack", p_stack_);
|
||||
os << xtag("env_stack", p_env_stack_);
|
||||
os << xtag("emit_expr", p_emit_expr_);
|
||||
os << ">";
|
||||
}
|
||||
} /*namespace scm*/
|
||||
|
|
|
|||
|
|
@ -25,15 +25,11 @@ namespace xo {
|
|||
if (!pps->print_upto_tag("env_stack", x.p_env_stack_))
|
||||
return false;
|
||||
|
||||
if (!pps->print_upto_tag("emit_expr", (void*)x.p_emit_expr_))
|
||||
return false;
|
||||
|
||||
return pps->print_upto(">");
|
||||
} else {
|
||||
pps->write("<psm");
|
||||
pps->newline_pretty_tag(ppii.ci1(), "stack", x.p_stack_);
|
||||
pps->newline_pretty_tag(ppii.ci1(), "env_stack", x.p_env_stack_);
|
||||
pps->newline_pretty_tag(ppii.ci1(), "emit_expr", (void*)x.p_emit_expr_);
|
||||
pps->write(">");
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ namespace xo {
|
|||
/* each loop iterations reads one token */
|
||||
|
||||
/* read one token from input */
|
||||
auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
|
||||
auto [tk, used_span, error1] = this->tokenizer_.scan2(input, eof);
|
||||
|
||||
log && log(xtag("consumed", used_span));
|
||||
log && log(xtag("input.pre", input));
|
||||
|
|
@ -53,45 +53,60 @@ namespace xo {
|
|||
|
||||
if (tk.is_valid()) {
|
||||
/* forward just-read token to parser */
|
||||
auto expr = this->parser_.include_token(tk);
|
||||
auto parser_result = this->parser_.include_token(tk);
|
||||
|
||||
if (expr) {
|
||||
if (parser_result.is_expression()) {
|
||||
log && log(xtag("outcome", "victory!"),
|
||||
xtag("expr", expr));
|
||||
xtag("expr", parser_result.result_expr()));
|
||||
|
||||
/* token completes an expression -> victory */
|
||||
return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
|
||||
return reader_result(parser_result.result_expr(),
|
||||
expr_span, parser_.stack_size(), reader_error());
|
||||
} else if (parser_result.is_error()) {
|
||||
/* 1. parser detected error.
|
||||
* 2. tokenizer_.input_state() refers to position just after offending token
|
||||
* 3. error_pos here is 0 because error detected at token boundary
|
||||
*/
|
||||
reader_error error2(parser_result.error_src_function(),
|
||||
parser_result.error_description(),
|
||||
tokenizer_.input_state().rewind(tk.text().size()),
|
||||
0 /*error_pos*/);
|
||||
|
||||
std::cout << "parser error pre-report:" << std::endl;
|
||||
error2.report(std::cout);
|
||||
|
||||
return reader_result(nullptr, expr_span, parser_.stack_size(), error2);
|
||||
} else {
|
||||
/* token did not complete an expression
|
||||
* (e.g. token for '[')
|
||||
*
|
||||
* input span may conotain more tokens -> iterate
|
||||
* input span may contain more tokens -> iterate
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
if (error.is_error()) {
|
||||
if (error1.is_error()) {
|
||||
/* tokenizer detected an error */
|
||||
|
||||
std::cout << "tokenizer error pre-report:" << std::endl;
|
||||
error.report(std::cout);
|
||||
error1.report(std::cout);
|
||||
|
||||
return reader_result(nullptr, expr_span, parser_.stack_size(),
|
||||
reader_error(error.src_function(),
|
||||
error.error_description(),
|
||||
error.input_state(),
|
||||
error.error_pos()));
|
||||
reader_error(error1.src_function(),
|
||||
error1.error_description(),
|
||||
error1.input_state(),
|
||||
error1.error_pos()));
|
||||
} else {
|
||||
/* control should not come here */
|
||||
|
||||
assert(input.empty());
|
||||
}
|
||||
|
||||
/* ono more tokens in input */
|
||||
/* need more tokens in input */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* control here: either
|
||||
/* control here: eithero
|
||||
* 1. input.empty (perhaps ate some whitespace, ok)
|
||||
* 2. missing or incomplete token (ok unless eof)
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue