From 6381fbbe8a493106d16f7a0e6a4041212fdbd787 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Sat, 19 Jul 2025 21:09:57 -0500 Subject: [PATCH] detailed parser error reporting [wip - 1 example] --- include/xo/tokenizer/input_state.hpp | 15 ++++++++++++++- include/xo/tokenizer/tokenizer.hpp | 19 ++++++++++--------- include/xo/tokenizer/tokenizer_error.hpp | 14 +++++++------- utest/tokenizer.test.cpp | 2 +- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/include/xo/tokenizer/input_state.hpp b/include/xo/tokenizer/input_state.hpp index c5d57a39..493a3823 100644 --- a/include/xo/tokenizer/input_state.hpp +++ b/include/xo/tokenizer/input_state.hpp @@ -70,7 +70,12 @@ namespace xo { /** @defgroup input-state-general-methods **/ ///@{ - /** capture prefix of @p input up to first newline **/ + /** Input state less @p n chars. + * Use to recover input state before a complete but error-triggering token + **/ + input_state rewind(std::size_t n) const; + + /** Capture prefix of @p input up to first newline **/ void capture_current_line(const span_type & input); /** Reset input state for start of next line. @@ -128,6 +133,14 @@ namespace xo { return false; } + template + input_state + input_state::rewind(std::size_t n) const { + return input_state(this->current_line_, + (n <= current_pos_) ? current_pos_ - n : 0, + 0 /*whitespace*/); + } + template void input_state::consume(size_t z) { diff --git a/include/xo/tokenizer/tokenizer.hpp b/include/xo/tokenizer/tokenizer.hpp index 74cc43f6..9b017486 100644 --- a/include/xo/tokenizer/tokenizer.hpp +++ b/include/xo/tokenizer/tokenizer.hpp @@ -70,6 +70,16 @@ namespace xo { ///@} + /** @defgroup tokenizer-access-methods tokenizer access methods **/ + ///@{ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wchanges-meaning" + const input_state & input_state() const { return input_state_; } +#pragma GCC diagnostic pop + + ///@} + /** @defgroup tokenizer-general-methods tokenizer methods **/ ///@{ @@ -355,9 +365,6 @@ namespace xo { (error_type(__FUNCTION__ /*src_function*/, "improperly placed sign indicator", input_state_, - //current_line_, - //current_pos_, - //initial_whitespace, (ix - tk_start) )); } @@ -367,9 +374,6 @@ namespace xo { (error_type(__FUNCTION__ /*src_function*/, "duplicate decimal point in numeric literal", input_state_, - //current_line_, - //current_pos_, - //initial_whitespace, (ix - tk_start))); } @@ -380,9 +384,6 @@ namespace xo { (error_type(__FUNCTION__ /*src_function*/, "duplicate exponent marker in numeric literal", input_state_, - //current_line_, - //current_pos_, - //initial_whitespace, (ix - tk_start))); } diff --git a/include/xo/tokenizer/tokenizer_error.hpp b/include/xo/tokenizer/tokenizer_error.hpp index 743e3faf..b2ea6c2e 100644 --- a/include/xo/tokenizer/tokenizer_error.hpp +++ b/include/xo/tokenizer/tokenizer_error.hpp @@ -34,11 +34,11 @@ namespace xo { * @p error_pos error location relative to token start **/ tokenizer_error(const char * src_function, - const char * error_description, + std::string error_description, const input_state_type & input_state, size_t error_pos) : src_function_{src_function}, - error_description_{error_description}, + error_description_{std::move(error_description)}, input_state_{input_state}, error_pos_{error_pos} { @@ -53,7 +53,7 @@ namespace xo { ///@{ const char * src_function() const { return src_function_; } - const char * error_description() const { return error_description_; } + const std::string & error_description() const { return error_description_; } #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wchanges-meaning" const input_state_type & input_state() const { return input_state_; } @@ -68,9 +68,9 @@ namespace xo { ///@{ /** true, except for a sentinel error object **/ - bool is_error() const { return error_description_ != nullptr; } + bool is_error() const { return !error_description_.empty(); } /** false except for object in sentinel state **/ - bool is_not_an_error() const { return error_description_ == nullptr; } + bool is_not_an_error() const { return error_description_.empty(); } /** Print representation to stream @p os. Intended for tokenizer diagnostics. * For Schematika errors prefer @ref report @@ -89,7 +89,7 @@ namespace xo { /** source location (in tokenizer) at which error identified **/ char const * src_function_ = nullptr; /** static error description **/ - char const * error_description_ = nullptr; + std::string error_description_; /** input state associated with this error. * Sufficient to precisely locate it with context. **/ @@ -117,7 +117,7 @@ namespace xo { tokenizer_error::report(std::ostream & os) const { using namespace std; - if (error_description_) { + if (!error_description_.empty()) { const char * prefix = "input: "; /* input_state.current_pos: position of first character following preceding token. * input_state.whitespace: whitespace between current_pos and start of failing token diff --git a/utest/tokenizer.test.cpp b/utest/tokenizer.test.cpp index dbcc74f0..527a685b 100644 --- a/utest/tokenizer.test.cpp +++ b/utest/tokenizer.test.cpp @@ -555,7 +555,7 @@ namespace xo { if (sr.error().src_function()) { REHEARSE(rh, std::string(sr.error().src_function()) == std::string(testcase.expect_error_.src_function())); } - if (sr.error().error_description()) { + if (!sr.error().error_description().empty()) { REHEARSE(rh, std::string(sr.error().error_description()) == std::string(testcase.expect_error_.error_description())); } REHEARSE(rh, sr.error().whitespace() == testcase.expect_error_.whitespace());