reader reports tokenizer errors through normal return
This commit is contained in:
parent
2e0846823b
commit
8c36bbce28
14 changed files with 174 additions and 30 deletions
|
|
@ -61,10 +61,15 @@ main() {
|
||||||
input = span_type::from_string(input_str);
|
input = span_type::from_string(input_str);
|
||||||
|
|
||||||
while (!input.empty()) {
|
while (!input.empty()) {
|
||||||
auto [expr, consumed, psz] = rdr.read_expr(input, eof);
|
auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
|
||||||
|
|
||||||
if (expr) {
|
if (expr) {
|
||||||
cout << expr << endl;
|
cout << expr << endl;
|
||||||
|
} else if (error.is_error()) {
|
||||||
|
cout << "parsing error: " << endl;
|
||||||
|
error.report(cout);
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
input = input.after_prefix(consumed);
|
input = input.after_prefix(consumed);
|
||||||
|
|
@ -72,9 +77,12 @@ main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/);
|
auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
|
||||||
|
|
||||||
if (expr) {
|
if (expr) {
|
||||||
cout << expr << endl;
|
cout << expr << endl;
|
||||||
|
} else if (error.is_error()) {
|
||||||
|
cout << "parsing error: " << endl;
|
||||||
|
error.report(cout);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -93,27 +93,41 @@ main() {
|
||||||
input = span_type::from_string(input_str);
|
input = span_type::from_string(input_str);
|
||||||
|
|
||||||
while (!input.empty()) {
|
while (!input.empty()) {
|
||||||
auto [expr, consumed, psz] = rdr.read_expr(input, eof);
|
auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
|
||||||
|
|
||||||
if (expr) {
|
if (expr) {
|
||||||
ppconfig ppc;
|
ppconfig ppc;
|
||||||
ppstate_standalone pps(&cout, 0, &ppc);
|
ppstate_standalone pps(&cout, 0, &ppc);
|
||||||
|
|
||||||
pps.prettyn(expr);
|
pps.prettyn(expr);
|
||||||
|
} else if (error.is_error()) {
|
||||||
|
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
|
||||||
|
error.report(cout);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
input = input.after_prefix(consumed);
|
input = input.after_prefix(consumed);
|
||||||
parser_stack_size = psz;
|
parser_stack_size = psz;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* here: input.empty() or error encountered */
|
||||||
|
|
||||||
|
/* discard stashed remainder of input line
|
||||||
|
* (for nicely-formatted errors)
|
||||||
|
*/
|
||||||
|
rdr.reset_to_idle_toplevel();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/);
|
auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
|
||||||
|
|
||||||
if (expr) {
|
if (expr) {
|
||||||
ppconfig ppc;
|
ppconfig ppc;
|
||||||
ppstate_standalone pps(&cout, 0, &ppc);
|
ppstate_standalone pps(&cout, 0, &ppc);
|
||||||
|
|
||||||
pps.prettyn<rp<Expression>>(rp<Expression>(expr));
|
pps.prettyn<rp<Expression>>(rp<Expression>(expr));
|
||||||
|
} else if (error.is_error()) {
|
||||||
|
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
|
||||||
|
error.report(cout);
|
||||||
}
|
}
|
||||||
|
|
||||||
rx.history_save("repl_history.txt");
|
rx.history_save("repl_history.txt");
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,8 @@ namespace xo {
|
||||||
void push_envframe(const rp<LocalEnv> & x);
|
void push_envframe(const rp<LocalEnv> & x);
|
||||||
rp<LocalEnv> pop_envframe();
|
rp<LocalEnv> pop_envframe();
|
||||||
|
|
||||||
|
void reset_to_toplevel() { stack_.resize(1); }
|
||||||
|
|
||||||
/** relative to top-of-stack.
|
/** relative to top-of-stack.
|
||||||
* 0 -> top (last in), z-1 -> bottom (first in)
|
* 0 -> top (last in), z-1 -> bottom (first in)
|
||||||
**/
|
**/
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,8 @@ namespace xo {
|
||||||
void push_exprstate(std::unique_ptr<exprstate> exs);
|
void push_exprstate(std::unique_ptr<exprstate> exs);
|
||||||
std::unique_ptr<exprstate> pop_exprstate();
|
std::unique_ptr<exprstate> pop_exprstate();
|
||||||
|
|
||||||
|
void reset_to_toplevel();
|
||||||
|
|
||||||
/** relative to top-of-stack.
|
/** relative to top-of-stack.
|
||||||
* 0 -> top (last in), z-1 -> bottom (first in)
|
* 0 -> top (last in), z-1 -> bottom (first in)
|
||||||
**/
|
**/
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,8 @@ namespace xo {
|
||||||
* $varname(n) : $typename(n)) [-> $typename[ret]]
|
* $varname(n) : $typename(n)) [-> $typename[ret]]
|
||||||
* body-expr
|
* body-expr
|
||||||
* [ end $functionname ]
|
* [ end $functionname ]
|
||||||
* literal-expr = integer-literal
|
* literal-expr = boolean-literal
|
||||||
|
* | integer-literal
|
||||||
* | fp-literal
|
* | fp-literal
|
||||||
* | string-literal
|
* | string-literal
|
||||||
* | symbol-literal
|
* | symbol-literal
|
||||||
|
|
@ -211,6 +212,13 @@ namespace xo {
|
||||||
**/
|
**/
|
||||||
rp<Expression> include_token(const token_type & tk);
|
rp<Expression> include_token(const token_type & tk);
|
||||||
|
|
||||||
|
/** reset to starting parsing state.
|
||||||
|
* use this after encountering an error, to avoid cascade of
|
||||||
|
* spurious secondary errors.. particularly important when
|
||||||
|
* invoked asa part of a REPL.
|
||||||
|
**/
|
||||||
|
void reset_to_idle_toplevel();
|
||||||
|
|
||||||
/** print human-readable representation on stream @p os **/
|
/** print human-readable representation on stream @p os **/
|
||||||
void print(std::ostream & os) const;
|
void print(std::ostream & os) const;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
#include "reader_error.hpp"
|
||||||
#include "xo/expression/Expression.hpp"
|
#include "xo/expression/Expression.hpp"
|
||||||
#include "xo/expression/pretty_expression.hpp"
|
#include "xo/expression/pretty_expression.hpp"
|
||||||
#include "xo/tokenizer/tokenizer.hpp"
|
#include "xo/tokenizer/tokenizer.hpp"
|
||||||
|
|
@ -19,8 +20,8 @@ namespace xo {
|
||||||
using Expression = xo::ast::Expression;
|
using Expression = xo::ast::Expression;
|
||||||
using span_type = span<const char>;
|
using span_type = span<const char>;
|
||||||
|
|
||||||
reader_result(rp<Expression> expr, span_type rem, std::size_t psz)
|
reader_result(rp<Expression> expr, span_type rem, std::size_t psz, const reader_error & error)
|
||||||
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz} {}
|
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz}, error_{error} {}
|
||||||
|
|
||||||
/** true if reader parsed a complete expression **/
|
/** true if reader parsed a complete expression **/
|
||||||
bool expr_complete() const { return expr_.get(); }
|
bool expr_complete() const { return expr_.get(); }
|
||||||
|
|
@ -37,6 +38,9 @@ namespace xo {
|
||||||
* will be zero whenever @ref expr_ is non-null
|
* will be zero whenever @ref expr_ is non-null
|
||||||
**/
|
**/
|
||||||
std::size_t parser_stack_size_ = 0;
|
std::size_t parser_stack_size_ = 0;
|
||||||
|
|
||||||
|
/** error description, whenever .error_.is_error() is true **/
|
||||||
|
reader_error error_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -53,7 +57,7 @@ namespace xo {
|
||||||
*
|
*
|
||||||
* for (auto rem = input; !rem.empty();) {
|
* for (auto rem = input; !rem.empty();) {
|
||||||
* // res: (parsed-expr, used)
|
* // res: (parsed-expr, used)
|
||||||
* auto res = rdr.read_expr(rem, eof);
|
* auto [expres = rdr.read_expr(rem, eof);
|
||||||
*
|
*
|
||||||
* if (res.first) {
|
* if (res.first) {
|
||||||
* // do something with res.first (parsed expr)
|
* // do something with res.first (parsed expr)
|
||||||
|
|
@ -112,6 +116,13 @@ namespace xo {
|
||||||
**/
|
**/
|
||||||
reader_result read_expr(const span_type & input, bool eof);
|
reader_result read_expr(const span_type & input, bool eof);
|
||||||
|
|
||||||
|
/** reset to known starting point after encountering an error.
|
||||||
|
* - remainder of stashed current line.
|
||||||
|
* Necesary for well-formatted error reporting.
|
||||||
|
* - current parsing state
|
||||||
|
**/
|
||||||
|
void reset_to_idle_toplevel();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/** tokenizer: text -> tokens **/
|
/** tokenizer: text -> tokens **/
|
||||||
tokenizer_type tokenizer_;
|
tokenizer_type tokenizer_;
|
||||||
|
|
|
||||||
53
xo-reader/include/xo/reader/reader_error.hpp
Normal file
53
xo-reader/include/xo/reader/reader_error.hpp
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* reader_error.hpp
|
||||||
|
*
|
||||||
|
* Author: Roland Conybeare, Jul 2025
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xo/tokenizer/tokenizer_error.hpp"
|
||||||
|
|
||||||
|
namespace xo {
|
||||||
|
namespace scm {
|
||||||
|
class reader_error {
|
||||||
|
public:
|
||||||
|
using input_state_type = typename tokenizer_error<char>::input_state_type;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/** default ctor represents a not-an-error sentinel object **/
|
||||||
|
reader_error() = default;
|
||||||
|
/** construct to capture parsing error context
|
||||||
|
* @
|
||||||
|
**/
|
||||||
|
reader_error(const char * src_function,
|
||||||
|
const char * error_description,
|
||||||
|
const input_state_type & input_state,
|
||||||
|
size_t error_pos)
|
||||||
|
: tk_error_{src_function, error_description, input_state, error_pos}
|
||||||
|
{}
|
||||||
|
|
||||||
|
const tokenizer_error<char> & tk_error() const { return tk_error_; }
|
||||||
|
|
||||||
|
/** true, except for sentinel not-an-error object **/
|
||||||
|
bool is_error() const { return tk_error_.is_error(); }
|
||||||
|
/** false, except for object in sentinel state **/
|
||||||
|
bool is_not_an_error() const { return tk_error_.is_not_an_error(); }
|
||||||
|
|
||||||
|
const char * src_function() const { return tk_error_.src_function(); }
|
||||||
|
|
||||||
|
/** print error representation to stream @p os. Intended for parser/tokenizer
|
||||||
|
* diagnostics. For Schematika errors prefer @ref report
|
||||||
|
**/
|
||||||
|
void print(std::ostream & os) const { tk_error_.print(os); }
|
||||||
|
|
||||||
|
/** print human-oriented error report on @p os. **/
|
||||||
|
void report(std::ostream & os) const { tk_error_.report(os); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
/** for parser-level errors, will still use this for
|
||||||
|
* {src function, error description, input state and error pos}
|
||||||
|
**/
|
||||||
|
tokenizer_error<char> tk_error_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* end reader_error.hpp */
|
||||||
|
|
@ -20,6 +20,11 @@ namespace xo {
|
||||||
return *(stack_[z-1]);
|
return *(stack_[z-1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
exprstatestack::reset_to_toplevel() {
|
||||||
|
this->stack_.resize(1);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
exprstatestack::push_exprstate(std::unique_ptr<exprstate> exs) {
|
exprstatestack::push_exprstate(std::unique_ptr<exprstate> exs) {
|
||||||
constexpr bool c_debug_flag = true;
|
constexpr bool c_debug_flag = true;
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,13 @@ namespace xo {
|
||||||
return retval;
|
return retval;
|
||||||
} /*include_token*/
|
} /*include_token*/
|
||||||
|
|
||||||
|
void
|
||||||
|
parser::reset_to_idle_toplevel()
|
||||||
|
{
|
||||||
|
xs_stack_.reset_to_toplevel();
|
||||||
|
env_stack_.reset_to_toplevel();
|
||||||
|
} /*discard_current_state*/
|
||||||
|
|
||||||
void
|
void
|
||||||
parser::print(std::ostream & os) const {
|
parser::print(std::ostream & os) const {
|
||||||
os << "<parser"
|
os << "<parser"
|
||||||
|
|
|
||||||
|
|
@ -40,18 +40,15 @@ namespace xo {
|
||||||
span_type expr_span = input.prefix(0ul);
|
span_type expr_span = input.prefix(0ul);
|
||||||
|
|
||||||
while (!input.empty()) {
|
while (!input.empty()) {
|
||||||
|
/* each loop iterations reads one token */
|
||||||
|
|
||||||
/* read one token from input */
|
/* read one token from input */
|
||||||
auto sr = this->tokenizer_.scan2(input, eof);
|
auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
|
||||||
const auto & tk = sr.get_token();
|
|
||||||
const span_type & used_span = sr.consumed();
|
|
||||||
|
|
||||||
log && log(xtag("consumed", used_span));
|
log && log(xtag("consumed", used_span));
|
||||||
log && log(xtag("input.pre", input));
|
log && log(xtag("input.pre", input));
|
||||||
|
|
||||||
input = input.after_prefix(used_span);
|
input = this->tokenizer_.consume(used_span, input);
|
||||||
|
|
||||||
log && log(xtag("expr_span.pre", expr_span));
|
|
||||||
|
|
||||||
expr_span += used_span;
|
expr_span += used_span;
|
||||||
|
|
||||||
if (tk.is_valid()) {
|
if (tk.is_valid()) {
|
||||||
|
|
@ -63,18 +60,33 @@ namespace xo {
|
||||||
xtag("expr", expr));
|
xtag("expr", expr));
|
||||||
|
|
||||||
/* token completes an expression -> victory */
|
/* token completes an expression -> victory */
|
||||||
return reader_result(expr, expr_span, parser_.stack_size());
|
return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
|
||||||
} else {
|
} else {
|
||||||
/* token did not complete an expression
|
/* token did not complete an expression
|
||||||
* (e.g. token for '[')
|
* (e.g. token for '[')
|
||||||
*
|
*
|
||||||
* input span may contain more tokens -> iterate
|
* input span may conotain more tokens -> iterate
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(input.empty());
|
if (error.is_error()) {
|
||||||
|
/* tokenizer detected an error */
|
||||||
|
|
||||||
/* no more tokens in input */
|
std::cout << "tokenizer error pre-report:" << std::endl;
|
||||||
|
error.report(std::cout);
|
||||||
|
|
||||||
|
return reader_result(nullptr, expr_span, parser_.stack_size(),
|
||||||
|
reader_error(error.src_function(),
|
||||||
|
error.error_description(),
|
||||||
|
error.input_state(),
|
||||||
|
error.error_pos()));
|
||||||
|
} else {
|
||||||
|
/* control should not come here */
|
||||||
|
|
||||||
|
assert(input.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ono more tokens in input */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -99,7 +111,14 @@ namespace xo {
|
||||||
|
|
||||||
log && log(xtag("outcome", "noop"));
|
log && log(xtag("outcome", "noop"));
|
||||||
|
|
||||||
return reader_result(nullptr, expr_span, parser_.stack_size());
|
return reader_result(nullptr, expr_span, parser_.stack_size(), reader_error());
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
reader::reset_to_idle_toplevel()
|
||||||
|
{
|
||||||
|
this->tokenizer_.discard_current_line();
|
||||||
|
this->parser_.reset_to_idle_toplevel();
|
||||||
}
|
}
|
||||||
|
|
||||||
} /*namespace scm*/
|
} /*namespace scm*/
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ main() {
|
||||||
if (tk.is_valid()) {
|
if (tk.is_valid()) {
|
||||||
cout << tk << endl;
|
cout << tk << endl;
|
||||||
} else if (error.is_error()) {
|
} else if (error.is_error()) {
|
||||||
cout << "parsing error: " << endl;
|
cout << "tokenizer error: " << endl;
|
||||||
error.report(cout);
|
error.report(cout);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,18 @@ namespace xo {
|
||||||
/** @defgroup span-general-methods **/
|
/** @defgroup span-general-methods **/
|
||||||
///@{
|
///@{
|
||||||
|
|
||||||
|
/** @brief strip prefix until first occurence of '\n', including the newline **/
|
||||||
|
void discard_until_newline() {
|
||||||
|
for (const CharT * p = lo_; p < hi_; ++p) {
|
||||||
|
if (*p == '\n') {
|
||||||
|
lo_ = p + 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lo_ = hi_;
|
||||||
|
}
|
||||||
|
|
||||||
/** Create new span over supplied type,
|
/** Create new span over supplied type,
|
||||||
* with identical (possibly misaligned) endpoints.
|
* with identical (possibly misaligned) endpoints.
|
||||||
*
|
*
|
||||||
|
|
@ -142,8 +154,7 @@ namespace xo {
|
||||||
|
|
||||||
/** @brief create span with @p prefix of this span removed **/
|
/** @brief create span with @p prefix of this span removed **/
|
||||||
span after_prefix(const span & prefix) const {
|
span after_prefix(const span & prefix) const {
|
||||||
assert(prefix.lo() == lo_);
|
if (!prefix.is_null() && (prefix.lo() != lo_)) {
|
||||||
if (prefix.lo() != lo_) {
|
|
||||||
throw std::runtime_error
|
throw std::runtime_error
|
||||||
("after_prefix: expected prefix of this span");
|
("after_prefix: expected prefix of this span");
|
||||||
}
|
}
|
||||||
|
|
@ -174,7 +185,7 @@ namespace xo {
|
||||||
span & operator+=(const span & x) {
|
span & operator+=(const span & x) {
|
||||||
if (hi_ == x.lo_) {
|
if (hi_ == x.lo_) {
|
||||||
hi_ = x.hi_;
|
hi_ = x.hi_;
|
||||||
} else {
|
} else if (!x.is_null()) {
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,12 +31,16 @@ namespace xo {
|
||||||
* span_type input = ...;
|
* span_type input = ...;
|
||||||
*
|
*
|
||||||
* while (!input.empty()) {
|
* while (!input.empty()) {
|
||||||
* auto res = tkz.scan(input);
|
* auto [tk, consumed, error] = tkz.scan(input);
|
||||||
* auto [tk, consumed, error] = res.first;
|
|
||||||
*
|
*
|
||||||
* // do something with tk if tk.is_valid()
|
* if (tk.is_valid()) {
|
||||||
|
* // do something with tk
|
||||||
|
* } else if (error.is_error()) {
|
||||||
|
* error.report(cout);
|
||||||
|
* break;
|
||||||
|
* }
|
||||||
*
|
*
|
||||||
* input = tkz.consume(res.second, input);
|
* input = tkz.consume(consumed, input);
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* if endofinput {
|
* if endofinput {
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ namespace xo {
|
||||||
/** @defgroup tokenizer-error-ctors **/
|
/** @defgroup tokenizer-error-ctors **/
|
||||||
///@{
|
///@{
|
||||||
|
|
||||||
/** Default ctor represent a not-an-error sentinel object **/
|
/** Default ctor represents a not-an-error sentinel object **/
|
||||||
tokenizer_error() = default;
|
tokenizer_error() = default;
|
||||||
/** Constructor to capture parsing error context
|
/** Constructor to capture parsing error context
|
||||||
* @p tk_start current position on entry to scanner
|
* @p tk_start current position on entry to scanner
|
||||||
|
|
@ -69,7 +69,7 @@ namespace xo {
|
||||||
|
|
||||||
/** true, except for a sentinel error object **/
|
/** true, except for a sentinel error object **/
|
||||||
bool is_error() const { return error_description_ != nullptr; }
|
bool is_error() const { return error_description_ != nullptr; }
|
||||||
/** true except for object in sentinel state **/
|
/** false except for object in sentinel state **/
|
||||||
bool is_not_an_error() const { return error_description_ == nullptr; }
|
bool is_not_an_error() const { return error_description_ == nullptr; }
|
||||||
|
|
||||||
/** Print representation to stream @p os. Intended for tokenizer diagnostics.
|
/** Print representation to stream @p os. Intended for tokenizer diagnostics.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue