reader reports tokenizer errors through normal return
This commit is contained in:
parent
2e0846823b
commit
8c36bbce28
14 changed files with 174 additions and 30 deletions
|
|
@ -61,10 +61,15 @@ main() {
|
|||
input = span_type::from_string(input_str);
|
||||
|
||||
while (!input.empty()) {
|
||||
auto [expr, consumed, psz] = rdr.read_expr(input, eof);
|
||||
auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
|
||||
|
||||
if (expr) {
|
||||
cout << expr << endl;
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error: " << endl;
|
||||
error.report(cout);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
input = input.after_prefix(consumed);
|
||||
|
|
@ -72,9 +77,12 @@ main() {
|
|||
}
|
||||
}
|
||||
|
||||
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/);
|
||||
auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
|
||||
|
||||
if (expr) {
|
||||
cout << expr << endl;
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error: " << endl;
|
||||
error.report(cout);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,27 +93,41 @@ main() {
|
|||
input = span_type::from_string(input_str);
|
||||
|
||||
while (!input.empty()) {
|
||||
auto [expr, consumed, psz] = rdr.read_expr(input, eof);
|
||||
auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
|
||||
|
||||
if (expr) {
|
||||
ppconfig ppc;
|
||||
ppstate_standalone pps(&cout, 0, &ppc);
|
||||
|
||||
pps.prettyn(expr);
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
|
||||
error.report(cout);
|
||||
break;
|
||||
}
|
||||
|
||||
input = input.after_prefix(consumed);
|
||||
parser_stack_size = psz;
|
||||
}
|
||||
|
||||
/* here: input.empty() or error encountered */
|
||||
|
||||
/* discard stashed remainder of input line
|
||||
* (for nicely-formatted errors)
|
||||
*/
|
||||
rdr.reset_to_idle_toplevel();
|
||||
}
|
||||
|
||||
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/);
|
||||
auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
|
||||
|
||||
if (expr) {
|
||||
ppconfig ppc;
|
||||
ppstate_standalone pps(&cout, 0, &ppc);
|
||||
|
||||
pps.prettyn<rp<Expression>>(rp<Expression>(expr));
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
|
||||
error.report(cout);
|
||||
}
|
||||
|
||||
rx.history_save("repl_history.txt");
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ namespace xo {
|
|||
void push_envframe(const rp<LocalEnv> & x);
|
||||
rp<LocalEnv> pop_envframe();
|
||||
|
||||
void reset_to_toplevel() { stack_.resize(1); }
|
||||
|
||||
/** relative to top-of-stack.
|
||||
* 0 -> top (last in), z-1 -> bottom (first in)
|
||||
**/
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ namespace xo {
|
|||
void push_exprstate(std::unique_ptr<exprstate> exs);
|
||||
std::unique_ptr<exprstate> pop_exprstate();
|
||||
|
||||
void reset_to_toplevel();
|
||||
|
||||
/** relative to top-of-stack.
|
||||
* 0 -> top (last in), z-1 -> bottom (first in)
|
||||
**/
|
||||
|
|
|
|||
|
|
@ -104,7 +104,8 @@ namespace xo {
|
|||
* $varname(n) : $typename(n)) [-> $typename[ret]]
|
||||
* body-expr
|
||||
* [ end $functionname ]
|
||||
* literal-expr = integer-literal
|
||||
* literal-expr = boolean-literal
|
||||
* | integer-literal
|
||||
* | fp-literal
|
||||
* | string-literal
|
||||
* | symbol-literal
|
||||
|
|
@ -211,6 +212,13 @@ namespace xo {
|
|||
**/
|
||||
rp<Expression> include_token(const token_type & tk);
|
||||
|
||||
/** reset to starting parsing state.
|
||||
* use this after encountering an error, to avoid cascade of
|
||||
* spurious secondary errors.. particularly important when
|
||||
* invoked asa part of a REPL.
|
||||
**/
|
||||
void reset_to_idle_toplevel();
|
||||
|
||||
/** print human-readable representation on stream @p os **/
|
||||
void print(std::ostream & os) const;
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "parser.hpp"
|
||||
#include "reader_error.hpp"
|
||||
#include "xo/expression/Expression.hpp"
|
||||
#include "xo/expression/pretty_expression.hpp"
|
||||
#include "xo/tokenizer/tokenizer.hpp"
|
||||
|
|
@ -19,8 +20,8 @@ namespace xo {
|
|||
using Expression = xo::ast::Expression;
|
||||
using span_type = span<const char>;
|
||||
|
||||
reader_result(rp<Expression> expr, span_type rem, std::size_t psz)
|
||||
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz} {}
|
||||
reader_result(rp<Expression> expr, span_type rem, std::size_t psz, const reader_error & error)
|
||||
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz}, error_{error} {}
|
||||
|
||||
/** true if reader parsed a complete expression **/
|
||||
bool expr_complete() const { return expr_.get(); }
|
||||
|
|
@ -37,6 +38,9 @@ namespace xo {
|
|||
* will be zero whenever @ref expr_ is non-null
|
||||
**/
|
||||
std::size_t parser_stack_size_ = 0;
|
||||
|
||||
/** error description, whenever .error_.is_error() is true **/
|
||||
reader_error error_;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -53,7 +57,7 @@ namespace xo {
|
|||
*
|
||||
* for (auto rem = input; !rem.empty();) {
|
||||
* // res: (parsed-expr, used)
|
||||
* auto res = rdr.read_expr(rem, eof);
|
||||
* auto [expres = rdr.read_expr(rem, eof);
|
||||
*
|
||||
* if (res.first) {
|
||||
* // do something with res.first (parsed expr)
|
||||
|
|
@ -112,6 +116,13 @@ namespace xo {
|
|||
**/
|
||||
reader_result read_expr(const span_type & input, bool eof);
|
||||
|
||||
/** reset to known starting point after encountering an error.
|
||||
* - remainder of stashed current line.
|
||||
* Necesary for well-formatted error reporting.
|
||||
* - current parsing state
|
||||
**/
|
||||
void reset_to_idle_toplevel();
|
||||
|
||||
private:
|
||||
/** tokenizer: text -> tokens **/
|
||||
tokenizer_type tokenizer_;
|
||||
|
|
|
|||
53
xo-reader/include/xo/reader/reader_error.hpp
Normal file
53
xo-reader/include/xo/reader/reader_error.hpp
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
/* reader_error.hpp
|
||||
*
|
||||
* Author: Roland Conybeare, Jul 2025
|
||||
*/
|
||||
|
||||
#include "xo/tokenizer/tokenizer_error.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
class reader_error {
|
||||
public:
|
||||
using input_state_type = typename tokenizer_error<char>::input_state_type;
|
||||
|
||||
public:
|
||||
/** default ctor represents a not-an-error sentinel object **/
|
||||
reader_error() = default;
|
||||
/** construct to capture parsing error context
|
||||
* @
|
||||
**/
|
||||
reader_error(const char * src_function,
|
||||
const char * error_description,
|
||||
const input_state_type & input_state,
|
||||
size_t error_pos)
|
||||
: tk_error_{src_function, error_description, input_state, error_pos}
|
||||
{}
|
||||
|
||||
const tokenizer_error<char> & tk_error() const { return tk_error_; }
|
||||
|
||||
/** true, except for sentinel not-an-error object **/
|
||||
bool is_error() const { return tk_error_.is_error(); }
|
||||
/** false, except for object in sentinel state **/
|
||||
bool is_not_an_error() const { return tk_error_.is_not_an_error(); }
|
||||
|
||||
const char * src_function() const { return tk_error_.src_function(); }
|
||||
|
||||
/** print error representation to stream @p os. Intended for parser/tokenizer
|
||||
* diagnostics. For Schematika errors prefer @ref report
|
||||
**/
|
||||
void print(std::ostream & os) const { tk_error_.print(os); }
|
||||
|
||||
/** print human-oriented error report on @p os. **/
|
||||
void report(std::ostream & os) const { tk_error_.report(os); }
|
||||
|
||||
private:
|
||||
/** for parser-level errors, will still use this for
|
||||
* {src function, error description, input state and error pos}
|
||||
**/
|
||||
tokenizer_error<char> tk_error_;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/* end reader_error.hpp */
|
||||
|
|
@ -20,6 +20,11 @@ namespace xo {
|
|||
return *(stack_[z-1]);
|
||||
}
|
||||
|
||||
void
|
||||
exprstatestack::reset_to_toplevel() {
|
||||
this->stack_.resize(1);
|
||||
}
|
||||
|
||||
void
|
||||
exprstatestack::push_exprstate(std::unique_ptr<exprstate> exs) {
|
||||
constexpr bool c_debug_flag = true;
|
||||
|
|
|
|||
|
|
@ -92,6 +92,13 @@ namespace xo {
|
|||
return retval;
|
||||
} /*include_token*/
|
||||
|
||||
void
|
||||
parser::reset_to_idle_toplevel()
|
||||
{
|
||||
xs_stack_.reset_to_toplevel();
|
||||
env_stack_.reset_to_toplevel();
|
||||
} /*discard_current_state*/
|
||||
|
||||
void
|
||||
parser::print(std::ostream & os) const {
|
||||
os << "<parser"
|
||||
|
|
|
|||
|
|
@ -40,18 +40,15 @@ namespace xo {
|
|||
span_type expr_span = input.prefix(0ul);
|
||||
|
||||
while (!input.empty()) {
|
||||
/* each loop iterations reads one token */
|
||||
|
||||
/* read one token from input */
|
||||
auto sr = this->tokenizer_.scan2(input, eof);
|
||||
const auto & tk = sr.get_token();
|
||||
const span_type & used_span = sr.consumed();
|
||||
auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
|
||||
|
||||
log && log(xtag("consumed", used_span));
|
||||
log && log(xtag("input.pre", input));
|
||||
|
||||
input = input.after_prefix(used_span);
|
||||
|
||||
log && log(xtag("expr_span.pre", expr_span));
|
||||
|
||||
input = this->tokenizer_.consume(used_span, input);
|
||||
expr_span += used_span;
|
||||
|
||||
if (tk.is_valid()) {
|
||||
|
|
@ -63,18 +60,33 @@ namespace xo {
|
|||
xtag("expr", expr));
|
||||
|
||||
/* token completes an expression -> victory */
|
||||
return reader_result(expr, expr_span, parser_.stack_size());
|
||||
return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
|
||||
} else {
|
||||
/* token did not complete an expression
|
||||
* (e.g. token for '[')
|
||||
*
|
||||
* input span may contain more tokens -> iterate
|
||||
* input span may conotain more tokens -> iterate
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
assert(input.empty());
|
||||
if (error.is_error()) {
|
||||
/* tokenizer detected an error */
|
||||
|
||||
/* no more tokens in input */
|
||||
std::cout << "tokenizer error pre-report:" << std::endl;
|
||||
error.report(std::cout);
|
||||
|
||||
return reader_result(nullptr, expr_span, parser_.stack_size(),
|
||||
reader_error(error.src_function(),
|
||||
error.error_description(),
|
||||
error.input_state(),
|
||||
error.error_pos()));
|
||||
} else {
|
||||
/* control should not come here */
|
||||
|
||||
assert(input.empty());
|
||||
}
|
||||
|
||||
/* ono more tokens in input */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -99,7 +111,14 @@ namespace xo {
|
|||
|
||||
log && log(xtag("outcome", "noop"));
|
||||
|
||||
return reader_result(nullptr, expr_span, parser_.stack_size());
|
||||
return reader_result(nullptr, expr_span, parser_.stack_size(), reader_error());
|
||||
}
|
||||
|
||||
void
|
||||
reader::reset_to_idle_toplevel()
|
||||
{
|
||||
this->tokenizer_.discard_current_line();
|
||||
this->parser_.reset_to_idle_toplevel();
|
||||
}
|
||||
|
||||
} /*namespace scm*/
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ main() {
|
|||
if (tk.is_valid()) {
|
||||
cout << tk << endl;
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error: " << endl;
|
||||
cout << "tokenizer error: " << endl;
|
||||
error.report(cout);
|
||||
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -104,6 +104,18 @@ namespace xo {
|
|||
/** @defgroup span-general-methods **/
|
||||
///@{
|
||||
|
||||
/** @brief strip prefix until first occurence of '\n', including the newline **/
|
||||
void discard_until_newline() {
|
||||
for (const CharT * p = lo_; p < hi_; ++p) {
|
||||
if (*p == '\n') {
|
||||
lo_ = p + 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
lo_ = hi_;
|
||||
}
|
||||
|
||||
/** Create new span over supplied type,
|
||||
* with identical (possibly misaligned) endpoints.
|
||||
*
|
||||
|
|
@ -142,8 +154,7 @@ namespace xo {
|
|||
|
||||
/** @brief create span with @p prefix of this span removed **/
|
||||
span after_prefix(const span & prefix) const {
|
||||
assert(prefix.lo() == lo_);
|
||||
if (prefix.lo() != lo_) {
|
||||
if (!prefix.is_null() && (prefix.lo() != lo_)) {
|
||||
throw std::runtime_error
|
||||
("after_prefix: expected prefix of this span");
|
||||
}
|
||||
|
|
@ -174,7 +185,7 @@ namespace xo {
|
|||
span & operator+=(const span & x) {
|
||||
if (hi_ == x.lo_) {
|
||||
hi_ = x.hi_;
|
||||
} else {
|
||||
} else if (!x.is_null()) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,12 +31,16 @@ namespace xo {
|
|||
* span_type input = ...;
|
||||
*
|
||||
* while (!input.empty()) {
|
||||
* auto res = tkz.scan(input);
|
||||
* auto [tk, consumed, error] = res.first;
|
||||
* auto [tk, consumed, error] = tkz.scan(input);
|
||||
*
|
||||
* // do something with tk if tk.is_valid()
|
||||
* if (tk.is_valid()) {
|
||||
* // do something with tk
|
||||
* } else if (error.is_error()) {
|
||||
* error.report(cout);
|
||||
* break;
|
||||
* }
|
||||
*
|
||||
* input = tkz.consume(res.second, input);
|
||||
* input = tkz.consume(consumed, input);
|
||||
* }
|
||||
*
|
||||
* if endofinput {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ namespace xo {
|
|||
/** @defgroup tokenizer-error-ctors **/
|
||||
///@{
|
||||
|
||||
/** Default ctor represent a not-an-error sentinel object **/
|
||||
/** Default ctor represents a not-an-error sentinel object **/
|
||||
tokenizer_error() = default;
|
||||
/** Constructor to capture parsing error context
|
||||
* @p tk_start current position on entry to scanner
|
||||
|
|
@ -69,7 +69,7 @@ namespace xo {
|
|||
|
||||
/** true, except for a sentinel error object **/
|
||||
bool is_error() const { return error_description_ != nullptr; }
|
||||
/** true except for object in sentinel state **/
|
||||
/** false except for object in sentinel state **/
|
||||
bool is_not_an_error() const { return error_description_ == nullptr; }
|
||||
|
||||
/** Print representation to stream @p os. Intended for tokenizer diagnostics.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue