detailed parser error reporting [wip - 1 example]

This commit is contained in:
Roland Conybeare 2025-07-19 21:09:57 -05:00
commit 65a83cd77c
19 changed files with 256 additions and 84 deletions

View file

@ -29,12 +29,14 @@ namespace xo {
* 2. top-level batch
* 3. nested
*
* @text
* 1 2 3
* +--------
* def | y y y
* symbol | y n n 1: evaluate as variable
* i64 | y n n 1: evaluate as constant
*
* @endtext
**/
class exprseq_xs : public exprstate {
public:

View file

@ -189,10 +189,10 @@ namespace xo {
void illegal_input_error(const char * self_name,
const token_type & tk) const;
/** throw exception when unable to locate definition for a variable
**/
/** capture error in @p *p_psm when unable to locate definition for a variable **/
void unknown_variable_error(const char * self_name,
const token_type & tk) const;
const token_type & tk,
parserstatemachine * p_psm) const;
protected:
/** explicit subtype: identifies derived class **/

View file

@ -7,6 +7,7 @@
#include "exprstatestack.hpp"
#include "envframestack.hpp"
#include "parser_result.hpp"
#include <stdexcept>
namespace xo {
@ -210,7 +211,7 @@ namespace xo {
* @return parsed expression, if @p tk completes an expression.
* otherwise nullptr
**/
rp<Expression> include_token(const token_type & tk);
const parser_result & include_token(const token_type & tk);
/** reset to starting parsing state.
* use this after encountering an error, to avoid cascade of
@ -240,6 +241,8 @@ namespace xo {
**/
envframestack env_stack_;
/** parser result state **/
parser_result result_;
}; /*parser*/
inline std::ostream &

View file

@ -0,0 +1,65 @@
/* file parser_result.hpp
*
* author: Roland Conybeare, Jul 2025
*/
#pragma once
#include "xo/expression/Expression.hpp"
namespace xo {
namespace scm {
enum parser_result_type {
/** no result yet (incomplete expression) **/
none,
/** emit expression **/
expression,
/** emit error **/
error
};
struct parser_result {
using Expression = xo::ast::Expression;
public:
parser_result() = default;
parser_result(parser_result_type type, rp<Expression> expr, const char * src_fn, std::string errmsg);
static parser_result none();
static parser_result error(const char * error_src_function,
std::string errmsg);
static parser_result expression(rp<Expression> expr);
bool is_none() const { return result_type_ == parser_result_type::none; }
bool is_expression() const { return result_type_ == parser_result_type::expression; }
bool is_error() const { return result_type_ == parser_result_type::error; }
parser_result_type result_type() const { return result_type_; }
const rp<Expression> & result_expr() const { return result_expr_; }
const char * error_src_function() const { return error_src_function_; }
const std::string & error_description() const { return error_description_; }
public:
/** none|expression|error_description
*
* @text
* result_type | error_src_function | error_description
* -------------+--------------------+-------------------
* none | nullptr | empty
* expression | nullptr | empty
* error | non-null | non-empty
* @endtext
**/
parser_result_type result_type_ = parser_result_type::none;
/** if @ref result_state is parser_result_state::expression -> non-null result expression **/
rp<Expression> result_expr_;
/** if @ref result_state is parser_result_state::error -> non-null source function **/
const char * error_src_function_ = nullptr;
/** if @ref result_state is parser_result_state::error -> non-empty error description **/
std::string error_description_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end parser_result.hpp */

View file

@ -7,6 +7,7 @@
#include "exprstate.hpp"
#include "envframestack.hpp"
#include "parser_result.hpp"
namespace xo {
namespace scm {
@ -27,10 +28,17 @@ namespace xo {
public:
parserstatemachine(exprstatestack * p_stack,
envframestack * p_env_stack,
rp<Expression> * p_emit_expr)
parser_result * p_result)
: p_stack_{p_stack},
p_env_stack_{p_env_stack},
p_emit_expr_{p_emit_expr} {}
p_result_{p_result}
{}
//const parser_result & result() const { return result_; }
//parser_result_state result_state() const { return result_state_; }
//const rp<Expression> & result_expr() const { return result_expr_; }
//const char * error_src_function() const { return error_src_function_; }
//const std::string & error_description() const { return error_description_; }
std::unique_ptr<exprstate> pop_exprstate();
exprstate & top_exprstate();
@ -57,13 +65,19 @@ namespace xo {
void on_expr_with_semicolon(bp<Expression> expr);
void on_symbol(const std::string & symbol);
// ---- parsing inputs -----
// ----- parsing inputs -----
void on_semicolon_token(const token_type & tk);
void on_operator_token(const token_type & tk);
void on_leftbrace_token(const token_type & tk);
void on_rightbrace_token(const token_type & tk);
// ----- parsing error -----
/** @p self_name location (implementation function) where error detected
**/
void on_error(const char * self_name, std::string error_description);
/** write human-readable representation on @p os **/
void print(std::ostream & os) const;
@ -72,13 +86,11 @@ namespace xo {
* generally speaking, push when to start new work for nested content;
* pop when work complete
**/
exprstatestack * p_stack_;
exprstatestack * p_stack_ = nullptr;
/** stack of environment frames, one for each enclosing lambda **/
envframestack * p_env_stack_;
/** if non-null, store next non-nested complete expressions in
* *p_emit_expr
**/
rp<Expression> * p_emit_expr_;
envframestack * p_env_stack_ = nullptr;
/** parser result object **/
parser_result * p_result_ = nullptr;
};
inline std::ostream &

View file

@ -3,6 +3,8 @@
* Author: Roland Conybeare, Jul 2025
*/
#pragma once
#include "xo/tokenizer/tokenizer_error.hpp"
namespace xo {
@ -18,7 +20,7 @@ namespace xo {
* @
**/
reader_error(const char * src_function,
const char * error_description,
std::string error_description,
const input_state_type & input_state,
size_t error_pos)
: tk_error_{src_function, error_description, input_state, error_pos}

View file

@ -2,6 +2,7 @@
set(SELF_LIB xo_reader)
set(SELF_SRCS
parser_result.cpp
parser.cpp
parserstatemachine.cpp
reader.cpp

View file

@ -37,8 +37,6 @@ namespace xo {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
//constexpr const char * c_self_name = "exprseq_xs::on_def_token";
define_xs::start(p_psm);
/* keyword 'def' introduces a definition:
@ -68,7 +66,7 @@ namespace xo {
if (var.get()) {
progress_xs::start(var.promote(), p_psm);
} else {
this->unknown_variable_error(c_self_name, tk);
this->unknown_variable_error(c_self_name, tk, p_psm);
}
} else {
/* policy: don't allow variable references as toplevel expressions
@ -117,10 +115,9 @@ namespace xo {
* arbitrary number of expressions.
*/
auto p_emit_expr = p_psm->p_emit_expr_;
*p_emit_expr = expr.promote();
} /*on_expr*/
*(p_psm->p_result_) = parser_result::expression(expr.promote());
}
void
exprseq_xs::on_expr_with_semicolon(bp<Expression> expr,
@ -132,9 +129,7 @@ namespace xo {
* semicolons are sometimes mandatory to avoid ambiguity.
*/
auto p_emit_expr = p_psm->p_emit_expr_;
*p_emit_expr = expr.promote();
*(p_psm->p_result_) = parser_result::expression(expr.promote());
}
} /*namespace scm*/

View file

@ -466,12 +466,13 @@ namespace xo {
void
exprstate::unknown_variable_error(const char * self_name,
const token_type & tk) const
const token_type & tk,
parserstatemachine * p_psm) const
{
throw std::runtime_error
(tostr(self_name,
": unknown variable name",
xtag("var", tk.text())));
std::string errmsg = tostr("unknown variable name",
xtag("var", tk.text()));
p_psm->on_error(self_name, std::move(errmsg));
}
} /*namespace scm*/
} /*namespace xo*/

View file

@ -30,7 +30,7 @@ namespace xo {
// ----- parser -----
parser::parser()
: xs_stack_{}, env_stack_{}
: xs_stack_{}, env_stack_{}, result_{}
{
/* top-level environment. initially empty */
rp<LocalEnv> toplevel_env = LocalEnv::make_empty();
@ -46,25 +46,23 @@ namespace xo {
void
parser::begin_interactive_session() {
/* note: not using emit expr here */
parserstatemachine psm(&xs_stack_,
&env_stack_,
nullptr /*p_emit_expr*/);
&result_);
exprseq_xs::start(exprseqtype::toplevel_interactive, &psm);
}
void
parser::begin_translation_unit() {
/* note: not using emit expr here */
parserstatemachine psm(&xs_stack_,
&env_stack_,
nullptr /*p_emit_expr*/);
&result_);
exprseq_xs::start(exprseqtype::toplevel_batch, &psm);
}
rp<Expression>
const parser_result &
parser::include_token(const token_type & tk)
{
constexpr bool c_debug_flag = true;
@ -81,15 +79,11 @@ namespace xo {
log && log(xtag("top", xs_stack_.top_exprstate()));
rp<Expression> retval;
parserstatemachine psm(&xs_stack_, &env_stack_, &retval);
parserstatemachine psm(&xs_stack_, &env_stack_, &result_);
xs_stack_.top_exprstate().on_input(tk, &psm);
log && log(xtag("retval", retval));
return retval;
return result_;
} /*include_token*/
void
@ -97,6 +91,7 @@ namespace xo {
{
xs_stack_.reset_to_toplevel();
env_stack_.reset_to_toplevel();
result_ = parser_result::none();
} /*discard_current_state*/
void

View file

@ -0,0 +1,44 @@
/* file parser_result.cpp
*
* author: Roland Conybeare, Jul 2025
*/
#include "parser_result.hpp"
namespace xo {
namespace scm {
parser_result::parser_result(parser_result_type type, rp<Expression> expr, const char * src_fn, std::string errmsg)
: result_type_{type}, result_expr_{std::move(expr)}, error_src_function_{src_fn}, error_description_{std::move(errmsg)}
{}
parser_result
parser_result::none()
{
return parser_result(parser_result_type::none,
nullptr,
nullptr,
std::string());
}
parser_result
parser_result::error(const char * error_src_function,
std::string errmsg)
{
return parser_result(parser_result_type::error,
nullptr,
error_src_function,
std::move(errmsg));
}
parser_result
parser_result::expression(rp<Expression> expr)
{
return parser_result(parser_result_type::expression,
std::move(expr),
nullptr /*src_function*/,
std::string());
}
} /*namespace scm*/
} /*namespace xo*/
/* end parser_result.cpp */

View file

@ -156,12 +156,17 @@ namespace xo {
->top_exprstate().on_rightbrace_token(tk, this);
}
void
parserstatemachine::on_error(const char * self_name, std::string errmsg)
{
*(this->p_result_) = parser_result::error(self_name, std::move(errmsg));
}
void
parserstatemachine::print(std::ostream & os) const {
os << "<psm";
os << xtag("stack", p_stack_);
os << xtag("env_stack", p_env_stack_);
os << xtag("emit_expr", p_emit_expr_);
os << ">";
}
} /*namespace scm*/

View file

@ -25,15 +25,11 @@ namespace xo {
if (!pps->print_upto_tag("env_stack", x.p_env_stack_))
return false;
if (!pps->print_upto_tag("emit_expr", (void*)x.p_emit_expr_))
return false;
return pps->print_upto(">");
} else {
pps->write("<psm");
pps->newline_pretty_tag(ppii.ci1(), "stack", x.p_stack_);
pps->newline_pretty_tag(ppii.ci1(), "env_stack", x.p_env_stack_);
pps->newline_pretty_tag(ppii.ci1(), "emit_expr", (void*)x.p_emit_expr_);
pps->write(">");
return false;

View file

@ -43,7 +43,7 @@ namespace xo {
/* each loop iterations reads one token */
/* read one token from input */
auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
auto [tk, used_span, error1] = this->tokenizer_.scan2(input, eof);
log && log(xtag("consumed", used_span));
log && log(xtag("input.pre", input));
@ -53,45 +53,60 @@ namespace xo {
if (tk.is_valid()) {
/* forward just-read token to parser */
auto expr = this->parser_.include_token(tk);
auto parser_result = this->parser_.include_token(tk);
if (expr) {
if (parser_result.is_expression()) {
log && log(xtag("outcome", "victory!"),
xtag("expr", expr));
xtag("expr", parser_result.result_expr()));
/* token completes an expression -> victory */
return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
return reader_result(parser_result.result_expr(),
expr_span, parser_.stack_size(), reader_error());
} else if (parser_result.is_error()) {
/* 1. parser detected error.
* 2. tokenizer_.input_state() refers to position just after offending token
* 3. error_pos here is 0 because error detected at token boundary
*/
reader_error error2(parser_result.error_src_function(),
parser_result.error_description(),
tokenizer_.input_state().rewind(tk.text().size()),
0 /*error_pos*/);
std::cout << "parser error pre-report:" << std::endl;
error2.report(std::cout);
return reader_result(nullptr, expr_span, parser_.stack_size(), error2);
} else {
/* token did not complete an expression
* (e.g. token for '[')
*
* input span may conotain more tokens -> iterate
* input span may contain more tokens -> iterate
*/
}
} else {
if (error.is_error()) {
if (error1.is_error()) {
/* tokenizer detected an error */
std::cout << "tokenizer error pre-report:" << std::endl;
error.report(std::cout);
error1.report(std::cout);
return reader_result(nullptr, expr_span, parser_.stack_size(),
reader_error(error.src_function(),
error.error_description(),
error.input_state(),
error.error_pos()));
reader_error(error1.src_function(),
error1.error_description(),
error1.input_state(),
error1.error_pos()));
} else {
/* control should not come here */
assert(input.empty());
}
/* ono more tokens in input */
/* need more tokens in input */
break;
}
}
/* control here: either
/* control here: eithero
* 1. input.empty (perhaps ate some whitespace, ok)
* 2. missing or incomplete token (ok unless eof)
*/

View file

@ -37,7 +37,11 @@ namespace xo {
*/
{
auto r1 = parser.include_token(token_type::def());
REQUIRE(r1.get() == nullptr);
REQUIRE(r1.is_none());
REQUIRE(r1.result_expr().get() == nullptr);
REQUIRE(r1.error_src_function() == nullptr);
REQUIRE(r1.error_description().empty());
/* stack should be:
*
@ -69,7 +73,10 @@ namespace xo {
cerr << "parser state after [def foo]" << endl;
cerr << parser << endl;
REQUIRE(r2.get() == nullptr);
REQUIRE(r2.is_none());
REQUIRE(r2.result_expr().get() == nullptr);
REQUIRE(r2.error_src_function() == nullptr);
REQUIRE(r2.error_description().empty());
/* stack should be:
*
@ -102,7 +109,10 @@ namespace xo {
cerr << "parser state after [def foo :]" << endl;
cerr << parser << endl;
REQUIRE(r3.get() == nullptr);
REQUIRE(r3.is_none());
REQUIRE(r3.result_expr().get() == nullptr);
REQUIRE(r3.error_src_function() == nullptr);
REQUIRE(r3.error_description().empty());
/* stack should be:
*
@ -134,7 +144,10 @@ namespace xo {
cerr << "parser state after [def foo : f64]" << endl;
cerr << parser << endl;
REQUIRE(r4.get() == nullptr);
REQUIRE(r4.is_none());
REQUIRE(r4.result_expr().get() == nullptr);
REQUIRE(r4.error_src_function() == nullptr);
REQUIRE(r4.error_description().empty());
CHECK(parser.stack_size() == 2);
@ -178,7 +191,10 @@ namespace xo {
cerr << "parser state after [def foo : f64 =]" << endl;
cerr << parser << endl;
REQUIRE(r5.get() == nullptr);
REQUIRE(r5.is_none());
REQUIRE(r5.result_expr().get() == nullptr);
REQUIRE(r5.error_src_function() == nullptr);
REQUIRE(r5.error_description().empty());
CHECK(parser.stack_size() == 3);
@ -219,7 +235,10 @@ namespace xo {
cerr << "parser state after [def foo : f64 = 3.14159265]" << endl;
cerr << parser << endl;
REQUIRE(r6.get() == nullptr);
REQUIRE(r6.is_none());
REQUIRE(r6.result_expr().get() == nullptr);
REQUIRE(r6.error_src_function() == nullptr);
REQUIRE(r6.error_description().empty());
/* stack should be
*
@ -258,7 +277,10 @@ namespace xo {
cerr << "parser state after [def foo : f64 = 3.14159265;]" << endl;
cerr << parser << endl;
REQUIRE(r7.get() != nullptr);
REQUIRE(r7.is_expression());
REQUIRE(r7.result_expr().get() != nullptr);
REQUIRE(r7.error_src_function() == nullptr);
REQUIRE(r7.error_description().empty());
CHECK(parser.stack_size() == 1);

View file

@ -70,7 +70,12 @@ namespace xo {
/** @defgroup input-state-general-methods **/
///@{
/** capture prefix of @p input up to first newline **/
/** Input state less @p n chars.
* Use to recover input state before a complete but error-triggering token
**/
input_state rewind(std::size_t n) const;
/** Capture prefix of @p input up to first newline **/
void capture_current_line(const span_type & input);
/** Reset input state for start of next line.
@ -128,6 +133,14 @@ namespace xo {
return false;
}
template <typename CharT>
input_state<CharT>
input_state<CharT>::rewind(std::size_t n) const {
return input_state<CharT>(this->current_line_,
(n <= current_pos_) ? current_pos_ - n : 0,
0 /*whitespace*/);
}
template <typename CharT>
void
input_state<CharT>::consume(size_t z) {

View file

@ -70,6 +70,16 @@ namespace xo {
///@}
/** @defgroup tokenizer-access-methods tokenizer access methods **/
///@{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wchanges-meaning"
const input_state<CharT> & input_state() const { return input_state_; }
#pragma GCC diagnostic pop
///@}
/** @defgroup tokenizer-general-methods tokenizer methods **/
///@{
@ -355,9 +365,6 @@ namespace xo {
(error_type(__FUNCTION__ /*src_function*/,
"improperly placed sign indicator",
input_state_,
//current_line_,
//current_pos_,
//initial_whitespace,
(ix - tk_start)
));
}
@ -367,9 +374,6 @@ namespace xo {
(error_type(__FUNCTION__ /*src_function*/,
"duplicate decimal point in numeric literal",
input_state_,
//current_line_,
//current_pos_,
//initial_whitespace,
(ix - tk_start)));
}
@ -380,9 +384,6 @@ namespace xo {
(error_type(__FUNCTION__ /*src_function*/,
"duplicate exponent marker in numeric literal",
input_state_,
//current_line_,
//current_pos_,
//initial_whitespace,
(ix - tk_start)));
}

View file

@ -34,11 +34,11 @@ namespace xo {
* @p error_pos error location relative to token start
**/
tokenizer_error(const char * src_function,
const char * error_description,
std::string error_description,
const input_state_type & input_state,
size_t error_pos)
: src_function_{src_function},
error_description_{error_description},
error_description_{std::move(error_description)},
input_state_{input_state},
error_pos_{error_pos}
{
@ -53,7 +53,7 @@ namespace xo {
///@{
const char * src_function() const { return src_function_; }
const char * error_description() const { return error_description_; }
const std::string & error_description() const { return error_description_; }
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wchanges-meaning"
const input_state_type & input_state() const { return input_state_; }
@ -68,9 +68,9 @@ namespace xo {
///@{
/** true, except for a sentinel error object **/
bool is_error() const { return error_description_ != nullptr; }
bool is_error() const { return !error_description_.empty(); }
/** false except for object in sentinel state **/
bool is_not_an_error() const { return error_description_ == nullptr; }
bool is_not_an_error() const { return error_description_.empty(); }
/** Print representation to stream @p os. Intended for tokenizer diagnostics.
* For Schematika errors prefer @ref report
@ -89,7 +89,7 @@ namespace xo {
/** source location (in tokenizer) at which error identified **/
char const * src_function_ = nullptr;
/** static error description **/
char const * error_description_ = nullptr;
std::string error_description_;
/** input state associated with this error.
* Sufficient to precisely locate it with context.
**/
@ -117,7 +117,7 @@ namespace xo {
tokenizer_error<CharT>::report(std::ostream & os) const {
using namespace std;
if (error_description_) {
if (!error_description_.empty()) {
const char * prefix = "input: ";
/* input_state.current_pos: position of first character following preceding token.
* input_state.whitespace: whitespace between current_pos and start of failing token

View file

@ -555,7 +555,7 @@ namespace xo {
if (sr.error().src_function()) {
REHEARSE(rh, std::string(sr.error().src_function()) == std::string(testcase.expect_error_.src_function()));
}
if (sr.error().error_description()) {
if (!sr.error().error_description().empty()) {
REHEARSE(rh, std::string(sr.error().error_description()) == std::string(testcase.expect_error_.error_description()));
}
REHEARSE(rh, sr.error().whitespace() == testcase.expect_error_.whitespace());