xo-tokenizer: doc fixes

This commit is contained in:
Roland Conybeare 2025-06-25 21:45:24 -05:00
commit be157859cc
11 changed files with 105 additions and 19 deletions

View file

@ -5,4 +5,5 @@ xo_docdir_doxygen_config()
xo_docdir_sphinx_config(
index.rst install.rst examples.rst implementation.rst
token-class.rst tokenizer-error-class.rst span-class.rst tokentype-enum.rst
input-state.rst
)

View file

@ -29,8 +29,10 @@ Abstraction tower for *xo-tokenizer* components:
| tokenizer | |
+-----------------------------------------+ |
| scan_result | |
+-----------------+-----------------------+ buffer |
| token | tokenizer_error | |
+-----------------+-----------------------+ |
| tokentype | span | |
| | tokenizer_error | buffer |
| token +-----------------------+ |
| | input_state | |
+-----------------+-----------------------+ |
| tokentype | span | |
+-----------------+-----------------------+----------+

View file

@ -16,7 +16,10 @@ may appear in variable names: ``one-of-those-days`` is an ordinary symbol.
install
examples
implementation
tokenizer-class
scan-result-class
token-class
tokenizer-error-class
input-state-class
span-class
tokentype-enum

View file

@ -16,10 +16,12 @@ Context
| tokenizer | |
+-----------------------------------------+ |
| scan_result | |
+-----------------+-----------------------+ buffer |
| token | tokenizer_error | |
+-----------------+-----------------------+ |
| tokentype |cBLU span | |
| | tokenizer_error | buffer |
| token +-----------------------+ |
| | input_state | |
+-----------------+-----------------------+ |
| tokentype |cBLU span | |
+-----------------+-----------------------+----------+
.. code-block:: cpp
@ -34,11 +36,12 @@ Context
object span1<<span>>
span1 : lo = p
span2 : hi = p+25
span1 : hi = p+25
object dest<<memory>>
dest : def fact(n : i64) { ... }
span1 o-- dest
- Identify a sequence of characters stored in contiguous memory.

View file

@ -16,10 +16,12 @@ Context
| tokenizer | |
+-----------------------------------------+ |
| scan_result | |
+-----------------+-----------------------+ buffer |
|cBLU token | tokenizer_error | |
+-----------------+-----------------------+ |
| tokentype | span | |
|cBLU | tokenizer_error | buffer |
| token +-----------------------+ |
| | input_state | |
+-----------------+-----------------------+ |
| tokentype | span | |
+-----------------+-----------------------+----------+
.. code-block:: cpp

View file

@ -16,8 +16,10 @@ Context
|cBLU tokenizer | |
+-----------------------------------------+ |
| scan_result | |
+-----------------+-----------------------+ buffer |
| token | tokenizer_error | |
+-----------------+-----------------------+ |
| | tokenizer_error | buffer |
| token +-----------------------+ |
| | input_state | |
+-----------------+-----------------------+ |
| tokentype | span | |
+-----------------+-----------------------+----------+
@ -25,3 +27,36 @@ Context
.. code-block:: cpp
#include <xo/tokenizer/tokenizer.hpp>
.. uml::
:scale: 99%
:align: center
allowmixing
object tkz1<<tokenizer>>
tkz : input_state
object ins1<<input_state>>
tkz1 o-- ins1
- Assemble a stream of lexical tokens from a text stream.
- Lexical errors reported via scan_result instance;
errors reported with detailed context
Class
-----
.. doxygenclass:: xo::scm::tokenizer
Instance Variables
------------------
.. doxygenclass:: tokenizer-instance-vars
Constructors
------------
.. doxygengroup:: tokenizer-ctors

View file

@ -16,8 +16,10 @@ Context
| tokenizer | |
+-----------------------------------------+ |
| scan_result | |
+-----------------+-----------------------+ buffer |
| token |cBLU tokenizer_error | |
+-----------------+-----------------------+ |
| |cBLU tokenizer_error | buffer |
| token +-----------------------+ |
| | input_state | |
+-----------------+-----------------------+ |
| tokentype | span | |
+-----------------+-----------------------+----------+
@ -34,7 +36,7 @@ Class
Instance Variables
------------------
.. doxygengroup:: tokenizer-error-instance-vars
.. doxygengroup:: tokenizer-error-vars
Constructors
------------

View file

@ -16,8 +16,10 @@ Context
| tokenizer | |
+-----------------------------------------+ |
| scan_result | |
+-----------------+-----------------------+ buffer |
| token | tokenizer_error | |
+-----------------+-----------------------+ |
| | tokenizer_error | buffer |
| token +-----------------------+ |
| | input_state | |
+-----------------+-----------------------+ |
|cBLU tokentype | span | |
+-----------------+-----------------------+----------+

View file

@ -16,9 +16,18 @@ namespace xo {
template <typename CharT>
class input_state {
public:
/** @defgroup input-state-type-traits input-state type straits **/
///@{
/** type representing a contiguous span of tokenizer input characters **/
using span_type = span<const CharT>;
///@}
public:
/** @defgroup input-state-ctors input_state constructors **/
///@{
input_state() = default;
explicit input_state(bool debug_flag) : debug_flag_{debug_flag} {}
/** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
@ -27,6 +36,11 @@ namespace xo {
explicit input_state(const span<const CharT>& current_line, size_t current_pos, size_t whitespace)
: current_line_{current_line}, current_pos_{current_pos}, whitespace_{whitespace} {}
///@}
/** @defgroup input-state static methods **/
///@{
/** recognize the newline character '\n' **/
static bool is_newline(CharT ch);
/** identifies whitespace chars.
@ -38,6 +52,11 @@ namespace xo {
**/
static bool is_whitespace(CharT ch);
///@}
/** @defgroup input-state-access-methods **/
///@{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wchanges-meaning"
const span_type & current_line() const { return current_line_; }
@ -46,6 +65,11 @@ namespace xo {
size_t whitespace() const { return whitespace_; }
bool debug_flag() const { return debug_flag_; }
///@}
/** @defgroup input-state-general-methods **/
///@{
/** capture prefix of @p input up to first newline **/
void capture_current_line(const span_type & input);
@ -55,11 +79,21 @@ namespace xo {
**/
void discard_current_line();
/** Add @p z to current position **/
void consume(size_t z) { current_pos_ += z; }
/** Skip prefix of input comprising whitespace.
* Return pointer to first non-whitespace character in @p input,
* or @c input.hi if input contains only whitespace
**/
const CharT * skip_leading_whitespace(const span_type & input);
///@}
private:
/** @defgroup input-state-instance-vars **/
///@{
/** remember current input line. Used only to report errors **/
span<const CharT> current_line_ = span<const CharT>();
/** current input position within @ref current_line_ **/
@ -71,6 +105,8 @@ namespace xo {
/** true to log input activity */
bool debug_flag_ = false;
///@}
};
template <typename CharT>

View file

@ -188,7 +188,7 @@ namespace xo {
std::string text_;
///@}
}; /*token*/
};
template <typename CharT>
std::int64_t

View file

@ -83,7 +83,7 @@ namespace xo {
///@}
private:
/** @defgroup tokenizer-error-instance-vars **/
/** @defgroup tokenizer-error-vars **/
///@{
/** source location (in tokenizer) at which error identified **/