detailed parser error reporting [wip - 1 example]

This commit is contained in:
Roland Conybeare 2025-07-19 21:09:57 -05:00
commit 65a83cd77c
19 changed files with 256 additions and 84 deletions

View file

@ -70,7 +70,12 @@ namespace xo {
/** @defgroup input-state-general-methods **/
///@{
/** capture prefix of @p input up to first newline **/
/** Input state less @p n chars.
* Use to recover input state before a complete but error-triggering token
**/
input_state rewind(std::size_t n) const;
/** Capture prefix of @p input up to first newline **/
void capture_current_line(const span_type & input);
/** Reset input state for start of next line.
@ -128,6 +133,14 @@ namespace xo {
return false;
}
template <typename CharT>
input_state<CharT>
input_state<CharT>::rewind(std::size_t n) const {
return input_state<CharT>(this->current_line_,
(n <= current_pos_) ? current_pos_ - n : 0,
0 /*whitespace*/);
}
template <typename CharT>
void
input_state<CharT>::consume(size_t z) {

View file

@ -70,6 +70,16 @@ namespace xo {
///@}
/** @defgroup tokenizer-access-methods tokenizer access methods **/
///@{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wchanges-meaning"
const input_state<CharT> & input_state() const { return input_state_; }
#pragma GCC diagnostic pop
///@}
/** @defgroup tokenizer-general-methods tokenizer methods **/
///@{
@ -355,9 +365,6 @@ namespace xo {
(error_type(__FUNCTION__ /*src_function*/,
"improperly placed sign indicator",
input_state_,
//current_line_,
//current_pos_,
//initial_whitespace,
(ix - tk_start)
));
}
@ -367,9 +374,6 @@ namespace xo {
(error_type(__FUNCTION__ /*src_function*/,
"duplicate decimal point in numeric literal",
input_state_,
//current_line_,
//current_pos_,
//initial_whitespace,
(ix - tk_start)));
}
@ -380,9 +384,6 @@ namespace xo {
(error_type(__FUNCTION__ /*src_function*/,
"duplicate exponent marker in numeric literal",
input_state_,
//current_line_,
//current_pos_,
//initial_whitespace,
(ix - tk_start)));
}

View file

@ -34,11 +34,11 @@ namespace xo {
* @p error_pos error location relative to token start
**/
tokenizer_error(const char * src_function,
const char * error_description,
std::string error_description,
const input_state_type & input_state,
size_t error_pos)
: src_function_{src_function},
error_description_{error_description},
error_description_{std::move(error_description)},
input_state_{input_state},
error_pos_{error_pos}
{
@ -53,7 +53,7 @@ namespace xo {
///@{
const char * src_function() const { return src_function_; }
const char * error_description() const { return error_description_; }
const std::string & error_description() const { return error_description_; }
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wchanges-meaning"
const input_state_type & input_state() const { return input_state_; }
@ -68,9 +68,9 @@ namespace xo {
///@{
/** true, except for a sentinel error object **/
bool is_error() const { return error_description_ != nullptr; }
bool is_error() const { return !error_description_.empty(); }
/** false except for object in sentinel state **/
bool is_not_an_error() const { return error_description_ == nullptr; }
bool is_not_an_error() const { return error_description_.empty(); }
/** Print representation to stream @p os. Intended for tokenizer diagnostics.
* For Schematika errors prefer @ref report
@ -89,7 +89,7 @@ namespace xo {
/** source location (in tokenizer) at which error identified **/
char const * src_function_ = nullptr;
/** static error description **/
char const * error_description_ = nullptr;
std::string error_description_;
/** input state associated with this error.
* Sufficient to precisely locate it with context.
**/
@ -117,7 +117,7 @@ namespace xo {
tokenizer_error<CharT>::report(std::ostream & os) const {
using namespace std;
if (error_description_) {
if (!error_description_.empty()) {
const char * prefix = "input: ";
/* input_state.current_pos: position of first character following preceding token.
* input_state.whitespace: whitespace between current_pos and start of failing token

View file

@ -555,7 +555,7 @@ namespace xo {
if (sr.error().src_function()) {
REHEARSE(rh, std::string(sr.error().src_function()) == std::string(testcase.expect_error_.src_function()));
}
if (sr.error().error_description()) {
if (!sr.error().error_description().empty()) {
REHEARSE(rh, std::string(sr.error().error_description()) == std::string(testcase.expect_error_.error_description()));
}
REHEARSE(rh, sr.error().whitespace() == testcase.expect_error_.whitespace());