From be157859cc89077716042912a19e6427c135d169 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Wed, 25 Jun 2025 21:45:24 -0500 Subject: [PATCH] xo-tokenizer: doc fixes --- xo-tokenizer/docs/CMakeLists.txt | 1 + xo-tokenizer/docs/implementation.rst | 8 ++-- xo-tokenizer/docs/index.rst | 3 ++ xo-tokenizer/docs/span-class.rst | 11 ++++-- xo-tokenizer/docs/token-class.rst | 8 ++-- xo-tokenizer/docs/tokenizer-class.rst | 39 ++++++++++++++++++- xo-tokenizer/docs/tokenizer-error-class.rst | 8 ++-- xo-tokenizer/docs/tokentype-enum.rst | 6 ++- .../include/xo/tokenizer/input_state.hpp | 36 +++++++++++++++++ xo-tokenizer/include/xo/tokenizer/token.hpp | 2 +- .../include/xo/tokenizer/tokenizer_error.hpp | 2 +- 11 files changed, 105 insertions(+), 19 deletions(-) diff --git a/xo-tokenizer/docs/CMakeLists.txt b/xo-tokenizer/docs/CMakeLists.txt index c930092b..898ea65d 100644 --- a/xo-tokenizer/docs/CMakeLists.txt +++ b/xo-tokenizer/docs/CMakeLists.txt @@ -5,4 +5,5 @@ xo_docdir_doxygen_config() xo_docdir_sphinx_config( index.rst install.rst examples.rst implementation.rst token-class.rst tokenizer-error-class.rst span-class.rst tokentype-enum.rst + input-state.rst ) diff --git a/xo-tokenizer/docs/implementation.rst b/xo-tokenizer/docs/implementation.rst index 62a01100..3f617e4d 100644 --- a/xo-tokenizer/docs/implementation.rst +++ b/xo-tokenizer/docs/implementation.rst @@ -29,8 +29,10 @@ Abstraction tower for *xo-tokenizer* components: | tokenizer | | +-----------------------------------------+ | | scan_result | | - +-----------------+-----------------------+ buffer | - | token | tokenizer_error | | +-----------------+-----------------------+ | - | tokentype | span | | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | +-----------------+-----------------------+----------+ diff --git a/xo-tokenizer/docs/index.rst b/xo-tokenizer/docs/index.rst index bcbf1f6a..b6353f4f 100644 --- a/xo-tokenizer/docs/index.rst +++ b/xo-tokenizer/docs/index.rst @@ -16,7 +16,10 @@ may appear in variable names: ``one-of-those-days`` is an ordinary symbol. install examples implementation + tokenizer-class + scan-result-class token-class tokenizer-error-class + input-state-class span-class tokentype-enum diff --git a/xo-tokenizer/docs/span-class.rst b/xo-tokenizer/docs/span-class.rst index a935d309..b641ca1f 100644 --- a/xo-tokenizer/docs/span-class.rst +++ b/xo-tokenizer/docs/span-class.rst @@ -16,10 +16,12 @@ Context | tokenizer | | +-----------------------------------------+ | | scan_result | | - +-----------------+-----------------------+ buffer | - | token | tokenizer_error | | +-----------------+-----------------------+ | - | tokentype |cBLU span | | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype |cBLU span | | +-----------------+-----------------------+----------+ .. code-block:: cpp @@ -34,11 +36,12 @@ Context object span1<> span1 : lo = p - span2 : hi = p+25 + span1 : hi = p+25 object dest<> dest : def fact(n : i64) { ... } + span1 o-- dest - Identify a sequence of characters stored in contiguous memory. diff --git a/xo-tokenizer/docs/token-class.rst b/xo-tokenizer/docs/token-class.rst index e0f58299..8d19a852 100644 --- a/xo-tokenizer/docs/token-class.rst +++ b/xo-tokenizer/docs/token-class.rst @@ -16,10 +16,12 @@ Context | tokenizer | | +-----------------------------------------+ | | scan_result | | - +-----------------+-----------------------+ buffer | - |cBLU token | tokenizer_error | | +-----------------+-----------------------+ | - | tokentype | span | | + |cBLU | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | +-----------------+-----------------------+----------+ .. code-block:: cpp diff --git a/xo-tokenizer/docs/tokenizer-class.rst b/xo-tokenizer/docs/tokenizer-class.rst index dac20549..5b29f4b3 100644 --- a/xo-tokenizer/docs/tokenizer-class.rst +++ b/xo-tokenizer/docs/tokenizer-class.rst @@ -16,8 +16,10 @@ Context |cBLU tokenizer | | +-----------------------------------------+ | | scan_result | | - +-----------------+-----------------------+ buffer | - | token | tokenizer_error | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | +-----------------+-----------------------+ | | tokentype | span | | +-----------------+-----------------------+----------+ @@ -25,3 +27,36 @@ Context .. code-block:: cpp #include + +.. uml:: + :scale: 99% + :align: center + + allowmixing + + object tkz1<> + tkz : input_state + + object ins1<> + + tkz1 o-- ins1 + +- Assemble a stream of lexical tokens from a text stream. + +- Lexical errors reported via scan_result instance; + errors reported with detailed context + +Class +----- + +.. doxygenclass:: xo::scm::tokenizer + +Instance Variables +------------------ + +.. doxygenclass:: tokenizer-instance-vars + +Constructors +------------ + +.. doxygengroup:: tokenizer-ctors diff --git a/xo-tokenizer/docs/tokenizer-error-class.rst b/xo-tokenizer/docs/tokenizer-error-class.rst index 0ff72520..848f2e98 100644 --- a/xo-tokenizer/docs/tokenizer-error-class.rst +++ b/xo-tokenizer/docs/tokenizer-error-class.rst @@ -16,8 +16,10 @@ Context | tokenizer | | +-----------------------------------------+ | | scan_result | | - +-----------------+-----------------------+ buffer | - | token |cBLU tokenizer_error | | + +-----------------+-----------------------+ | + | |cBLU tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | +-----------------+-----------------------+ | | tokentype | span | | +-----------------+-----------------------+----------+ @@ -34,7 +36,7 @@ Class Instance Variables ------------------ -.. doxygengroup:: tokenizer-error-instance-vars +.. doxygengroup:: tokenizer-error-vars Constructors ------------ diff --git a/xo-tokenizer/docs/tokentype-enum.rst b/xo-tokenizer/docs/tokentype-enum.rst index 7342c700..0f371dda 100644 --- a/xo-tokenizer/docs/tokentype-enum.rst +++ b/xo-tokenizer/docs/tokentype-enum.rst @@ -16,8 +16,10 @@ Context | tokenizer | | +-----------------------------------------+ | | scan_result | | - +-----------------+-----------------------+ buffer | - | token | tokenizer_error | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | +-----------------+-----------------------+ | |cBLU tokentype | span | | +-----------------+-----------------------+----------+ diff --git a/xo-tokenizer/include/xo/tokenizer/input_state.hpp b/xo-tokenizer/include/xo/tokenizer/input_state.hpp index ca5ace33..c17f085b 100644 --- a/xo-tokenizer/include/xo/tokenizer/input_state.hpp +++ b/xo-tokenizer/include/xo/tokenizer/input_state.hpp @@ -16,9 +16,18 @@ namespace xo { template class input_state { public: + /** @defgroup input-state-type-traits input-state type straits **/ + ///@{ + + /** type representing a contiguous span of tokenizer input characters **/ using span_type = span; + ///@} + public: + /** @defgroup input-state-ctors input_state constructors **/ + ///@{ + input_state() = default; explicit input_state(bool debug_flag) : debug_flag_{debug_flag} {} /** Create instance with supplied @p current_line, @p current_pos, @p whitespace. @@ -27,6 +36,11 @@ namespace xo { explicit input_state(const span& current_line, size_t current_pos, size_t whitespace) : current_line_{current_line}, current_pos_{current_pos}, whitespace_{whitespace} {} + ///@} + + /** @defgroup input-state static methods **/ + ///@{ + /** recognize the newline character '\n' **/ static bool is_newline(CharT ch); /** identifies whitespace chars. @@ -38,6 +52,11 @@ namespace xo { **/ static bool is_whitespace(CharT ch); + ///@} + + /** @defgroup input-state-access-methods **/ + ///@{ + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wchanges-meaning" const span_type & current_line() const { return current_line_; } @@ -46,6 +65,11 @@ namespace xo { size_t whitespace() const { return whitespace_; } bool debug_flag() const { return debug_flag_; } + ///@} + + /** @defgroup input-state-general-methods **/ + ///@{ + /** capture prefix of @p input up to first newline **/ void capture_current_line(const span_type & input); @@ -55,11 +79,21 @@ namespace xo { **/ void discard_current_line(); + /** Add @p z to current position **/ void consume(size_t z) { current_pos_ += z; } + /** Skip prefix of input comprising whitespace. + * Return pointer to first non-whitespace character in @p input, + * or @c input.hi if input contains only whitespace + **/ const CharT * skip_leading_whitespace(const span_type & input); + ///@} + private: + /** @defgroup input-state-instance-vars **/ + ///@{ + /** remember current input line. Used only to report errors **/ span current_line_ = span(); /** current input position within @ref current_line_ **/ @@ -71,6 +105,8 @@ namespace xo { /** true to log input activity */ bool debug_flag_ = false; + + ///@} }; template diff --git a/xo-tokenizer/include/xo/tokenizer/token.hpp b/xo-tokenizer/include/xo/tokenizer/token.hpp index 68666da8..f12d95da 100644 --- a/xo-tokenizer/include/xo/tokenizer/token.hpp +++ b/xo-tokenizer/include/xo/tokenizer/token.hpp @@ -188,7 +188,7 @@ namespace xo { std::string text_; ///@} - }; /*token*/ + }; template std::int64_t diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp index 5b83e27f..1f807741 100644 --- a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp +++ b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp @@ -83,7 +83,7 @@ namespace xo { ///@} private: - /** @defgroup tokenizer-error-instance-vars **/ + /** @defgroup tokenizer-error-vars **/ ///@{ /** source location (in tokenizer) at which error identified **/