From d2177898d440a190d99bcf6388a37b2b267d2f70 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Sun, 22 Jun 2025 16:17:59 -0500 Subject: [PATCH] xo-tokenizer: + missing files --- .../include/xo/tokenizer/scan_result.hpp | 73 ++++++++++++++++++ .../include/xo/tokenizer/tokenizer_error.hpp | 75 +++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 xo-tokenizer/include/xo/tokenizer/scan_result.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp diff --git a/xo-tokenizer/include/xo/tokenizer/scan_result.hpp b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp new file mode 100644 index 00000000..9d15b90f --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp @@ -0,0 +1,73 @@ +/* file scan_result.hpp + * + * author: Roland Conybeare, Jun 2025 + */ + +#pragma once + +#include "token.hpp" +#include "tokenizer_error.hpp" + +namespace xo { + namespace scm { + /** @brief Represent result of parsing one input token. + * + * Possible outcomes fall into several categories + * (with T: @c token_.is_valid(), E: @cerror_.is_error()) + * + * | T | E | description | + * |-------+-------+-------------------| + * | false | false | end of input | + * | true | false | parsed token in T | + * | false | true | parse error in E | + * + **/ + template + class scan_result { + public: + using token_type = token; + using span_type = span; + using error_type = tokenizer_error; + + public: + scan_result(const token_type & token, + const span_type & consumed, + const error_type & error = error_type()) + : token_{token}, consumed_{consumed}, error_{error} {} + + static scan_result make_whitespace(const span_type & prefix_input); + static scan_result make_partial(const span_type & prefix_input); + + bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); } + bool is_token() const { return token_.is_valid(); } + bool is_error() const { return error_.is_error(); } + + const token_type & get_token() const { return token_; } + const span_type & consumed() const { return consumed_; } + const error_type & error() const { return error_; } + + public: + /** successfully parsed token, whenever tk_type != tokentype::tk_invalid **/ + token_type token_; + /** input span represented by .token, on success. Otherwise not defined **/ + span_type consumed_; + /** error description, whenever .error_.is_error() is true **/ + error_type error_; + }; + + template + auto scan_result::make_whitespace(const span_type& whitespace_input) -> scan_result + { + return scan_result(token_type::invalid(), whitespace_input /*consumed*/); + } + + template + auto scan_result::make_partial(const span_type& prefix_input) -> scan_result + { + return scan_result(token_type::invalid(), prefix_input /*consumed*/); + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end scan_result.hpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp new file mode 100644 index 00000000..3f5b5944 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp @@ -0,0 +1,75 @@ +/* file tokenizer_error.hpp + * + * author: Roland Conybeare, Jun 2025 + */ + +#pragma once + +#include "tokentype.hpp" +#include "span.hpp" + +namespace xo { + namespace scm { + /** represent a lexing error, with context **/ + template + class tokenizer_error { + public: + using span_type = span; + + public: + /** @brief default ctor represent a not-an-error error object **/ + tokenizer_error() = default; + tokenizer_error(char const * src_function, + char const* error_description, + span_type input_line, size_t error_pos) + : src_function_{src_function}, + error_description_{error_description}, + input_line_{input_line}, + error_pos_{error_pos} {} + + char const* src_function() const { return src_function_; } + char const* error_description() const { return error_description_; } + size_t error_pos() const { return error_pos_; } + const span_type& input_line() const { return input_line_; } + + bool is_not_an_error() const { return error_description_ == nullptr; } + bool is_error() const { return error_description_ != nullptr; } + + void print(std::ostream & os) const; + + private: + /** source location (in tokenizer) at which error identified **/ + char const * src_function_ = nullptr; + /** static error description **/ + char const * error_description_ = nullptr; + /** position (relative to line_.lo) of error **/ + size_t error_pos_ = 0; + /** complete input line (to the extent available) + * containing error + **/ + span_type input_line_ = span_type::make_null(); + }; /*error_token*/ + + template + void + tokenizer_error::print(std::ostream & os) const { + os << ""; + } + + template + inline std::ostream & + operator<< (std::ostream & os, + const tokenizer_error & tkerr) + { + tkerr.print(os); + return os; + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end tokenizer_error.hpp */