reader reports tokenizer errors through normal return

This commit is contained in:
Roland Conybeare 2025-07-19 16:47:59 -05:00
commit 781adeb0d3
10 changed files with 149 additions and 20 deletions

View file

@ -61,10 +61,15 @@ main() {
input = span_type::from_string(input_str);
while (!input.empty()) {
auto [expr, consumed, psz] = rdr.read_expr(input, eof);
auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
if (expr) {
cout << expr << endl;
} else if (error.is_error()) {
cout << "parsing error: " << endl;
error.report(cout);
break;
}
input = input.after_prefix(consumed);
@ -72,9 +77,12 @@ main() {
}
}
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/);
auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
if (expr) {
cout << expr << endl;
} else if (error.is_error()) {
cout << "parsing error: " << endl;
error.report(cout);
}
}

View file

@ -93,27 +93,41 @@ main() {
input = span_type::from_string(input_str);
while (!input.empty()) {
auto [expr, consumed, psz] = rdr.read_expr(input, eof);
auto [expr, consumed, psz, error] = rdr.read_expr(input, eof);
if (expr) {
ppconfig ppc;
ppstate_standalone pps(&cout, 0, &ppc);
pps.prettyn(expr);
} else if (error.is_error()) {
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
error.report(cout);
break;
}
input = input.after_prefix(consumed);
parser_stack_size = psz;
}
/* here: input.empty() or error encountered */
/* discard stashed remainder of input line
* (for nicely-formatted errors)
*/
rdr.reset_to_idle_toplevel();
}
auto [expr, _1, _2] = rdr.read_expr(input, true /*eof*/);
auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/);
if (expr) {
ppconfig ppc;
ppstate_standalone pps(&cout, 0, &ppc);
pps.prettyn<rp<Expression>>(rp<Expression>(expr));
} else if (error.is_error()) {
cout << "parsing error (detected in " << error.src_function() << "): " << endl;
error.report(cout);
}
rx.history_save("repl_history.txt");

View file

@ -40,6 +40,8 @@ namespace xo {
void push_envframe(const rp<LocalEnv> & x);
rp<LocalEnv> pop_envframe();
void reset_to_toplevel() { stack_.resize(1); }
/** relative to top-of-stack.
* 0 -> top (last in), z-1 -> bottom (first in)
**/

View file

@ -29,6 +29,8 @@ namespace xo {
void push_exprstate(std::unique_ptr<exprstate> exs);
std::unique_ptr<exprstate> pop_exprstate();
void reset_to_toplevel();
/** relative to top-of-stack.
* 0 -> top (last in), z-1 -> bottom (first in)
**/

View file

@ -104,7 +104,8 @@ namespace xo {
* $varname(n) : $typename(n)) [-> $typename[ret]]
* body-expr
* [ end $functionname ]
* literal-expr = integer-literal
* literal-expr = boolean-literal
* | integer-literal
* | fp-literal
* | string-literal
* | symbol-literal
@ -211,6 +212,13 @@ namespace xo {
**/
rp<Expression> include_token(const token_type & tk);
/** reset to starting parsing state.
* use this after encountering an error, to avoid cascade of
* spurious secondary errors.. particularly important when
* invoked asa part of a REPL.
**/
void reset_to_idle_toplevel();
/** print human-readable representation on stream @p os **/
void print(std::ostream & os) const;

View file

@ -6,6 +6,7 @@
#pragma once
#include "parser.hpp"
#include "reader_error.hpp"
#include "xo/expression/Expression.hpp"
#include "xo/expression/pretty_expression.hpp"
#include "xo/tokenizer/tokenizer.hpp"
@ -19,8 +20,8 @@ namespace xo {
using Expression = xo::ast::Expression;
using span_type = span<const char>;
reader_result(rp<Expression> expr, span_type rem, std::size_t psz)
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz} {}
reader_result(rp<Expression> expr, span_type rem, std::size_t psz, const reader_error & error)
: expr_{std::move(expr)}, rem_{rem}, parser_stack_size_{psz}, error_{error} {}
/** true if reader parsed a complete expression **/
bool expr_complete() const { return expr_.get(); }
@ -37,6 +38,9 @@ namespace xo {
* will be zero whenever @ref expr_ is non-null
**/
std::size_t parser_stack_size_ = 0;
/** error description, whenever .error_.is_error() is true **/
reader_error error_;
};
/**
@ -53,7 +57,7 @@ namespace xo {
*
* for (auto rem = input; !rem.empty();) {
* // res: (parsed-expr, used)
* auto res = rdr.read_expr(rem, eof);
* auto [expres = rdr.read_expr(rem, eof);
*
* if (res.first) {
* // do something with res.first (parsed expr)
@ -112,6 +116,13 @@ namespace xo {
**/
reader_result read_expr(const span_type & input, bool eof);
/** reset to known starting point after encountering an error.
* - remainder of stashed current line.
* Necesary for well-formatted error reporting.
* - current parsing state
**/
void reset_to_idle_toplevel();
private:
/** tokenizer: text -> tokens **/
tokenizer_type tokenizer_;

View file

@ -0,0 +1,53 @@
/* reader_error.hpp
*
* Author: Roland Conybeare, Jul 2025
*/
#include "xo/tokenizer/tokenizer_error.hpp"
namespace xo {
namespace scm {
class reader_error {
public:
using input_state_type = typename tokenizer_error<char>::input_state_type;
public:
/** default ctor represents a not-an-error sentinel object **/
reader_error() = default;
/** construct to capture parsing error context
* @
**/
reader_error(const char * src_function,
const char * error_description,
const input_state_type & input_state,
size_t error_pos)
: tk_error_{src_function, error_description, input_state, error_pos}
{}
const tokenizer_error<char> & tk_error() const { return tk_error_; }
/** true, except for sentinel not-an-error object **/
bool is_error() const { return tk_error_.is_error(); }
/** false, except for object in sentinel state **/
bool is_not_an_error() const { return tk_error_.is_not_an_error(); }
const char * src_function() const { return tk_error_.src_function(); }
/** print error representation to stream @p os. Intended for parser/tokenizer
* diagnostics. For Schematika errors prefer @ref report
**/
void print(std::ostream & os) const { tk_error_.print(os); }
/** print human-oriented error report on @p os. **/
void report(std::ostream & os) const { tk_error_.report(os); }
private:
/** for parser-level errors, will still use this for
* {src function, error description, input state and error pos}
**/
tokenizer_error<char> tk_error_;
};
}
}
/* end reader_error.hpp */

View file

@ -20,6 +20,11 @@ namespace xo {
return *(stack_[z-1]);
}
void
exprstatestack::reset_to_toplevel() {
this->stack_.resize(1);
}
void
exprstatestack::push_exprstate(std::unique_ptr<exprstate> exs) {
constexpr bool c_debug_flag = true;

View file

@ -92,6 +92,13 @@ namespace xo {
return retval;
} /*include_token*/
void
parser::reset_to_idle_toplevel()
{
xs_stack_.reset_to_toplevel();
env_stack_.reset_to_toplevel();
} /*discard_current_state*/
void
parser::print(std::ostream & os) const {
os << "<parser"

View file

@ -40,18 +40,15 @@ namespace xo {
span_type expr_span = input.prefix(0ul);
while (!input.empty()) {
/* each loop iterations reads one token */
/* read one token from input */
auto sr = this->tokenizer_.scan2(input, eof);
const auto & tk = sr.get_token();
const span_type & used_span = sr.consumed();
auto [tk, used_span, error] = this->tokenizer_.scan2(input, eof);
log && log(xtag("consumed", used_span));
log && log(xtag("input.pre", input));
input = input.after_prefix(used_span);
log && log(xtag("expr_span.pre", expr_span));
input = this->tokenizer_.consume(used_span, input);
expr_span += used_span;
if (tk.is_valid()) {
@ -63,18 +60,33 @@ namespace xo {
xtag("expr", expr));
/* token completes an expression -> victory */
return reader_result(expr, expr_span, parser_.stack_size());
return reader_result(expr, expr_span, parser_.stack_size(), reader_error());
} else {
/* token did not complete an expression
* (e.g. token for '[')
*
* input span may contain more tokens -> iterate
* input span may conotain more tokens -> iterate
*/
}
} else {
assert(input.empty());
if (error.is_error()) {
/* tokenizer detected an error */
/* no more tokens in input */
std::cout << "tokenizer error pre-report:" << std::endl;
error.report(std::cout);
return reader_result(nullptr, expr_span, parser_.stack_size(),
reader_error(error.src_function(),
error.error_description(),
error.input_state(),
error.error_pos()));
} else {
/* control should not come here */
assert(input.empty());
}
/* ono more tokens in input */
break;
}
}
@ -99,7 +111,14 @@ namespace xo {
log && log(xtag("outcome", "noop"));
return reader_result(nullptr, expr_span, parser_.stack_size());
return reader_result(nullptr, expr_span, parser_.stack_size(), reader_error());
}
void
reader::reset_to_idle_toplevel()
{
this->tokenizer_.discard_current_line();
this->parser_.reset_to_idle_toplevel();
}
} /*namespace scm*/