From 1f63b9d258d024bce7616d598208cde8cd447ccb Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Sat, 22 Nov 2025 23:06:51 -0500 Subject: [PATCH] xo-tokenizer: streamline error path during tokenization --- include/xo/tokenizer/scan_result.hpp | 28 +++++++-- include/xo/tokenizer/tokenizer.hpp | 94 +++++++++++++--------------- 2 files changed, 65 insertions(+), 57 deletions(-) diff --git a/include/xo/tokenizer/scan_result.hpp b/include/xo/tokenizer/scan_result.hpp index 79846d3c..975edf63 100644 --- a/include/xo/tokenizer/scan_result.hpp +++ b/include/xo/tokenizer/scan_result.hpp @@ -42,8 +42,17 @@ namespace xo { static scan_result make_whitespace(const span_type & prefix_input); static scan_result make_partial(const span_type & prefix_input); - static scan_result make_error(const error_type & error, - input_state_type & input_state_ref); + /** + * @p error_src can be __FUNCTION__ from site where error generated. + * @p error_msg error message + * @p error_pos error position, relative to start of token + * @p input_state_ref input state object; + * copied into scan_result, and leaving input_state_ref.current_line cleared + **/ + static scan_result make_error_consume_current_line(const char * error_src, + std::string error_msg, + size_t error_pos, + input_state_type & input_state_ref); bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); } bool is_token() const { return token_.is_valid(); } @@ -78,14 +87,23 @@ namespace xo { } template - auto scan_result::make_error(const error_type & error, - input_state_type & input_state_ref) -> scan_result + auto + scan_result::make_error_consume_current_line(const char * error_src, + std::string error_msg, + size_t error_pos, + input_state_type & input_state_ref) -> scan_result { /* report+consume entire input line */ + /* copy before altered by .consume_current_line() */ + input_state_type input_state_copy = input_state_ref; + return scan_result(token_type::invalid(), input_state_ref.consume_current_line(), - error); + error_type(error_src, + error_msg, + input_state_copy, + error_pos)); } } /*namespace scm*/ diff --git a/include/xo/tokenizer/tokenizer.hpp b/include/xo/tokenizer/tokenizer.hpp index 2ea695dc..e589b400 100644 --- a/include/xo/tokenizer/tokenizer.hpp +++ b/include/xo/tokenizer/tokenizer.hpp @@ -360,32 +360,28 @@ namespace xo { } else if (exponent_flag && !exponent_digit_flag) { exponent_sign_flag = true; } else { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "improperly placed sign indicator", - input_state_ref, - (ix - tk_start) - ), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "improperly placed sign indicator", + (ix - tk_start), input_state_ref); } } else if (*ix == '.') { if (period_flag) { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "duplicate decimal point in numeric literal", - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "duplicate decimal point in numeric literal", + (ix - tk_start), input_state_ref); } period_flag = true; } else if ((*ix == 'e') || (*ix == 'E')) { if (exponent_flag) { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "duplicate exponent marker in numeric literal", - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "duplicate exponent marker in numeric literal", + (ix - tk_start), input_state_ref); } @@ -398,11 +394,10 @@ namespace xo { number_flag = true; } } else { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "unexpected character in numeric constant" /*error_description*/, - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "unexpected character in numeric constant" /*error_description*/, + (ix - tk_start), input_state_ref); } } @@ -502,11 +497,10 @@ namespace xo { ++ix; if (ix == token_text.hi()) { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "expecting key following escape character \\", - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "expecting key following escape character \\", + (ix - tk_start), input_state_ref); } @@ -532,11 +526,10 @@ namespace xo { tk_text.push_back('"'); break; default: - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "expecting one of n|r|\"|\\ following escape \\", - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "expecting one of n|r|\"|\\ following escape \\", + (ix - tk_start), input_state_ref); } break; @@ -550,11 +543,10 @@ namespace xo { } if (!endofstring) { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "missing terminating '\"' to complete literal string", - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "missing terminating '\"' to complete literal string", + (ix - tk_start), input_state_ref); } @@ -693,11 +685,10 @@ namespace xo { } if (tk_type == tokentype::tk_invalid) { - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "illegal input character", - input_state_ref, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "illegal input character", + (ix - tk_start), input_state_ref); } @@ -768,7 +759,8 @@ namespace xo { template auto - tokenizer::scan(const span_type & input, bool eof_flag) -> result_type + tokenizer::scan(const span_type & input, + bool eof_flag) -> result_type { scope log(XO_DEBUG(input_state_.debug_flag())); @@ -871,11 +863,10 @@ namespace xo { } else if ((*ix == '\n') || (*ix == '\r')) { log && log ("string literal with naked newline or CR"); - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "must use \\n or \\r to encode newline/cr in string literal", - input_state_, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "must use \\n or \\r to encode newline/cr in string literal", + (ix - tk_start), this->input_state_); } @@ -885,11 +876,10 @@ namespace xo { if (!complete_flag) { log && log("unterminated string literal"); - return result_type::make_error - (error_type(__FUNCTION__ /*src_function*/, - "unterminated string literal", - input_state_, - (ix - tk_start)), + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "unterminated string literal", + (ix - tk_start), this->input_state_); } } else {