reader reports tokenizer errors through normal return

This commit is contained in:
Roland Conybeare 2025-07-19 16:47:59 -05:00
commit 8c36bbce28
14 changed files with 174 additions and 30 deletions

View file

@ -61,10 +61,15 @@ main() {
input = span_type::from_string(input_str); input = span_type::from_string(input_str);
while (!input.empty()) { while (!input.empty()) {
auto [expr, consumed, psz] = rdr.read_expr(input, eof); auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
if (expr) { if (expr) {
cout << expr << endl; cout << expr << endl;
} else if (error.is_error()) {
cout << "parsing error: " << endl;
error.report(cout);
break;
} }
input = input.after_prefix(consumed); input = input.after_prefix(consumed);
@ -72,9 +77,12 @@ main() {
} }
} }
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/); auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
if (expr) { if (expr) {
cout << expr << endl; cout << expr << endl;
} else if (error.is_error()) {
cout << "parsing error: " << endl;
error.report(cout);
} }
} }

View file

@ -93,27 +93,41 @@ main() {
input = span_type::from_string(input_str); input = span_type::from_string(input_str);
while (!input.empty()) { while (!input.empty()) {
auto [expr, consumed, psz] = rdr.read_expr(input, eof); auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
if (expr) { if (expr) {
ppconfig ppc; ppconfig ppc;
ppstate_standalone pps(&cout, 0, &ppc); ppstate_standalone pps(&cout, 0, &ppc);
pps.prettyn(expr); pps.prettyn(expr);
} else if (error.is_error()) {
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
error.report(cout);
break;
} }
input = input.after_prefix(consumed); input = input.after_prefix(consumed);
parser_stack_size = psz; parser_stack_size = psz;
} }
/* here: input.empty() or error encountered */
/* discard stashed remainder of input line
* (for nicely-formatted errors)
*/
rdr.reset_to_idle_toplevel();
} }
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/); auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
if (expr) { if (expr) {
ppconfig ppc; ppconfig ppc;
ppstate_standalone pps(&cout, 0, &ppc); ppstate_standalone pps(&cout, 0, &ppc);
pps.prettyn<rp<Expression>>(rp<Expression>(expr)); pps.prettyn<rp<Expression>>(rp<Expression>(expr));
} else if (error.is_error()) {
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
error.report(cout);
} }
rx.history_save("repl_history.txt"); rx.history_save("repl_history.txt");

View file

@ -40,6 +40,8 @@ namespace xo {
void push_envframe(const rp<LocalEnv> & x); void push_envframe(const rp<LocalEnv> & x);
rp<LocalEnv> pop_envframe(); rp<LocalEnv> pop_envframe();
void reset_to_toplevel() { stack_.resize(1); }
/** relative to top-of-stack. /** relative to top-of-stack.
* 0 -> top (last in), z-1 -> bottom (first in) * 0 -> top (last in), z-1 -> bottom (first in)
**/ **/

View file

@ -29,6 +29,8 @@ namespace xo {
void push_exprstate(std::unique_ptr<exprstate> exs); void push_exprstate(std::unique_ptr<exprstate> exs);
std::unique_ptr<exprstate> pop_exprstate(); std::unique_ptr<exprstate> pop_exprstate();
void reset_to_toplevel();
/** relative to top-of-stack. /** relative to top-of-stack.
* 0 -> top (last in), z-1 -> bottom (first in) * 0 -> top (last in), z-1 -> bottom (first in)
**/ **/

View file

@ -104,7 +104,8 @@ namespace xo {
* $varname(n) : $typename(n)) [-> $typename[ret]] * $varname(n) : $typename(n)) [-> $typename[ret]]
* body-expr * body-expr
* [ end $functionname ] * [ end $functionname ]
* literal-expr = integer-literal * literal-expr = boolean-literal
* | integer-literal
* | fp-literal * | fp-literal
* | string-literal * | string-literal
* | symbol-literal * | symbol-literal
@ -211,6 +212,13 @@ namespace xo {
**/ **/
rp<Expression> include_token(const token_type & tk); rp<Expression> include_token(const token_type & tk);
/** reset to starting parsing state.
* use this after encountering an error, to avoid cascade of
* spurious secondary errors.. particularly important when
* invoked asa part of a REPL.
**/
void reset_to_idle_toplevel();
/** print human-readable representation on stream @p os **/ /** print human-readable representation on stream @p os **/
void print(std::ostream & os) const; void print(std::ostream & os) const;

View file

@ -6,6 +6,7 @@
#pragma once #pragma once
#include "parser.hpp" #include "parser.hpp"
#include "reader_error.hpp"
#include "xo/expression/Expression.hpp" #include "xo/expression/Expression.hpp"
#include "xo/expression/pretty_expression.hpp" #include "xo/expression/pretty_expression.hpp"
#include "xo/tokenizer/tokenizer.hpp" #include "xo/tokenizer/tokenizer.hpp"
@ -19,8 +20,8 @@ namespace xo {
using Expression = xo::ast::Expression; using Expression = xo::ast::Expression;
using span_type = span<const char>; using span_type = span<const char>;
reader_result(rp<Expression> expr, span_type rem, std::size_t psz) reader_result(rp<Expression> expr, span_type rem, std::size_t psz, const reader_error & error)
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz} {} : expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz}, error_{error} {}
/** true if reader parsed a complete expression **/ /** true if reader parsed a complete expression **/
bool expr_complete() const { return expr_.get(); } bool expr_complete() const { return expr_.get(); }
@ -37,6 +38,9 @@ namespace xo {
* will be zero whenever @ref expr_ is non-null * will be zero whenever @ref expr_ is non-null
**/ **/
std::size_t parser_stack_size_ = 0; std::size_t parser_stack_size_ = 0;
/** error description, whenever .error_.is_error() is true **/
reader_error error_;
}; };
/** /**
@ -53,7 +57,7 @@ namespace xo {
* *
* for (auto rem = input; !rem.empty();) { * for (auto rem = input; !rem.empty();) {
* // res: (parsed-expr, used) * // res: (parsed-expr, used)
* auto res = rdr.read_expr(rem, eof); * auto [expres = rdr.read_expr(rem, eof);
* *
* if (res.first) { * if (res.first) {
* // do something with res.first (parsed expr) * // do something with res.first (parsed expr)
@ -112,6 +116,13 @@ namespace xo {
**/ **/
reader_result read_expr(const span_type & input, bool eof); reader_result read_expr(const span_type & input, bool eof);
/** reset to known starting point after encountering an error.
* - remainder of stashed current line.
* Necesary for well-formatted error reporting.
* - current parsing state
**/
void reset_to_idle_toplevel();
private: private:
/** tokenizer: text -> tokens **/ /** tokenizer: text -> tokens **/
tokenizer_type tokenizer_; tokenizer_type tokenizer_;

View file

@ -0,0 +1,53 @@
/* reader_error.hpp
*
* Author: Roland Conybeare, Jul 2025
*/
#include "xo/tokenizer/tokenizer_error.hpp"
namespace xo {
namespace scm {
class reader_error {
public:
using input_state_type = typename tokenizer_error<char>::input_state_type;
public:
/** default ctor represents a not-an-error sentinel object **/
reader_error() = default;
/** construct to capture parsing error context
* @
**/
reader_error(const char * src_function,
const char * error_description,
const input_state_type & input_state,
size_t error_pos)
: tk_error_{src_function, error_description, input_state, error_pos}
{}
const tokenizer_error<char> & tk_error() const { return tk_error_; }
/** true, except for sentinel not-an-error object **/
bool is_error() const { return tk_error_.is_error(); }
/** false, except for object in sentinel state **/
bool is_not_an_error() const { return tk_error_.is_not_an_error(); }
const char * src_function() const { return tk_error_.src_function(); }
/** print error representation to stream @p os. Intended for parser/tokenizer
* diagnostics. For Schematika errors prefer @ref report
**/
void print(std::ostream & os) const { tk_error_.print(os); }
/** print human-oriented error report on @p os. **/
void report(std::ostream & os) const { tk_error_.report(os); }
private:
/** for parser-level errors, will still use this for
* {src function, error description, input state and error pos}
**/
tokenizer_error<char> tk_error_;
};
}
}
/* end reader_error.hpp */

View file

@ -20,6 +20,11 @@ namespace xo {
return *(stack_[z-1]); return *(stack_[z-1]);
} }
void
exprstatestack::reset_to_toplevel() {
this->stack_.resize(1);
}
void void
exprstatestack::push_exprstate(std::unique_ptr<exprstate> exs) { exprstatestack::push_exprstate(std::unique_ptr<exprstate> exs) {
constexpr bool c_debug_flag = true; constexpr bool c_debug_flag = true;

View file

@ -92,6 +92,13 @@ namespace xo {
return retval; return retval;
} /*include_token*/ } /*include_token*/
void
parser::reset_to_idle_toplevel()
{
xs_stack_.reset_to_toplevel();
env_stack_.reset_to_toplevel();
} /*discard_current_state*/
void void
parser::print(std::ostream & os) const { parser::print(std::ostream & os) const {
os << "<parser" os << "<parser"

View file

@ -40,18 +40,15 @@ namespace xo {
span_type expr_span = input.prefix(0ul); span_type expr_span = input.prefix(0ul);
while (!input.empty()) { while (!input.empty()) {
/* each loop iterations reads one token */
/* read one token from input */ /* read one token from input */
auto sr = this->tokenizer_.scan2(input, eof); auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
const auto & tk = sr.get_token();
const span_type & used_span = sr.consumed();
log && log(xtag("consumed", used_span)); log && log(xtag("consumed", used_span));
log && log(xtag("input.pre", input)); log && log(xtag("input.pre", input));
input = input.after_prefix(used_span); input = this->tokenizer_.consume(used_span, input);
log && log(xtag("expr_span.pre", expr_span));
expr_span += used_span; expr_span += used_span;
if (tk.is_valid()) { if (tk.is_valid()) {
@ -63,18 +60,33 @@ namespace xo {
xtag("expr", expr)); xtag("expr", expr));
/* token completes an expression -> victory */ /* token completes an expression -> victory */
return reader_result(expr, expr_span, parser_.stack_size()); return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
} else { } else {
/* token did not complete an expression /* token did not complete an expression
* (e.g. token for '[') * (e.g. token for '[')
* *
* input span may contain more tokens -> iterate * input span may conotain more tokens -> iterate
*/ */
} }
} else { } else {
assert(input.empty()); if (error.is_error()) {
/* tokenizer detected an error */
/* no more tokens in input */ std::cout << "tokenizer error pre-report:" << std::endl;
error.report(std::cout);
return reader_result(nullptr, expr_span, parser_.stack_size(),
reader_error(error.src_function(),
error.error_description(),
error.input_state(),
error.error_pos()));
} else {
/* control should not come here */
assert(input.empty());
}
/* ono more tokens in input */
break; break;
} }
} }
@ -99,7 +111,14 @@ namespace xo {
log && log(xtag("outcome", "noop")); log && log(xtag("outcome", "noop"));
return reader_result(nullptr, expr_span, parser_.stack_size()); return reader_result(nullptr, expr_span, parser_.stack_size(), reader_error());
}
void
reader::reset_to_idle_toplevel()
{
this->tokenizer_.discard_current_line();
this->parser_.reset_to_idle_toplevel();
} }
} /*namespace scm*/ } /*namespace scm*/

View file

@ -41,7 +41,7 @@ main() {
if (tk.is_valid()) { if (tk.is_valid()) {
cout << tk << endl; cout << tk << endl;
} else if (error.is_error()) { } else if (error.is_error()) {
cout << "parsing error: " << endl; cout << "tokenizer error: " << endl;
error.report(cout); error.report(cout);
break; break;

View file

@ -104,6 +104,18 @@ namespace xo {
/** @defgroup span-general-methods **/ /** @defgroup span-general-methods **/
///@{ ///@{
/** @brief strip prefix until first occurence of '\n', including the newline **/
void discard_until_newline() {
for (const CharT * p = lo_; p < hi_; ++p) {
if (*p == '\n') {
lo_ = p + 1;
return;
}
}
lo_ = hi_;
}
/** Create new span over supplied type, /** Create new span over supplied type,
* with identical (possibly misaligned) endpoints. * with identical (possibly misaligned) endpoints.
* *
@ -142,8 +154,7 @@ namespace xo {
/** @brief create span with @p prefix of this span removed **/ /** @brief create span with @p prefix of this span removed **/
span after_prefix(const span & prefix) const { span after_prefix(const span & prefix) const {
assert(prefix.lo() == lo_); if (!prefix.is_null() && (prefix.lo() != lo_)) {
if (prefix.lo() != lo_) {
throw std::runtime_error throw std::runtime_error
("after_prefix: expected prefix of this span"); ("after_prefix: expected prefix of this span");
} }
@ -174,7 +185,7 @@ namespace xo {
span & operator+=(const span & x) { span & operator+=(const span & x) {
if (hi_ == x.lo_) { if (hi_ == x.lo_) {
hi_ = x.hi_; hi_ = x.hi_;
} else { } else if (!x.is_null()) {
assert(false); assert(false);
} }

View file

@ -31,12 +31,16 @@ namespace xo {
* span_type input = ...; * span_type input = ...;
* *
* while (!input.empty()) { * while (!input.empty()) {
* auto res = tkz.scan(input); * auto [tk, consumed, error] = tkz.scan(input);
* auto [tk, consumed, error] = res.first;
* *
* // do something with tk if tk.is_valid() * if (tk.is_valid()) {
* // do something with tk
* } else if (error.is_error()) {
* error.report(cout);
* break;
* }
* *
* input = tkz.consume(res.second, input); * input = tkz.consume(consumed, input);
* } * }
* *
* if endofinput { * if endofinput {

View file

@ -27,7 +27,7 @@ namespace xo {
/** @defgroup tokenizer-error-ctors **/ /** @defgroup tokenizer-error-ctors **/
///@{ ///@{
/** Default ctor represent a not-an-error sentinel object **/ /** Default ctor represents a not-an-error sentinel object **/
tokenizer_error() = default; tokenizer_error() = default;
/** Constructor to capture parsing error context /** Constructor to capture parsing error context
* @p tk_start current position on entry to scanner * @p tk_start current position on entry to scanner
@ -69,7 +69,7 @@ namespace xo {
/** true, except for a sentinel error object **/ /** true, except for a sentinel error object **/
bool is_error() const { return error_description_ != nullptr; } bool is_error() const { return error_description_ != nullptr; }
/** true except for object in sentinel state **/ /** false except for object in sentinel state **/
bool is_not_an_error() const { return error_description_ == nullptr; } bool is_not_an_error() const { return error_description_ == nullptr; }
/** Print representation to stream @p os. Intended for tokenizer diagnostics. /** Print representation to stream @p os. Intended for tokenizer diagnostics.