xo-tokenizer: docs + error-handling improvement

drop exceptions for return-value error object
2025-06-23 23:08:12 -05:00 · 2025-06-23 23:08:12 -05:00 · 6fbfd065a2
commit 6fbfd065a2
parent f9961a1e37
30 changed files with 1086 additions and 162 deletions
--- a/xo-tokenizer/docs/CMakeLists.txt
+++ b/xo-tokenizer/docs/CMakeLists.txt
@ -1,5 +1,8 @@
-# xo-tokenizer/CMakeLists.txt
+# xo-tokenizer/docs/CMakeLists.txt

 xo_doxygen_collect_deps()
 xo_docdir_doxygen_config()
-xo_docdir_sphinx_config(index.rst install.rst)
+xo_docdir_sphinx_config(
+    index.rst install.rst examples.rst implementation.rst
+    token-class.rst tokenizer-error-class.rst span-class.rst tokentype-enum.rst
+)
--- a/xo-tokenizer/docs/_static/README
+++ b/xo-tokenizer/docs/_static/README
@ -0,0 +1 @@
+add any static {.html, .js, ..} files for sphinx to pickup here
--- a/xo-tokenizer/docs/_static/img/favicon.ico
+++ b/xo-tokenizer/docs/_static/img/favicon.ico
--- a/xo-tokenizer/docs/conf.py
+++ b/xo-tokenizer/docs/conf.py
@ -0,0 +1,39 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'xo tokenizer documentation'
+copyright = '2024-2025, Roland Conybeare'
+author = 'Roland Conybeare'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+#extensions = []
+extensions = [ "breathe",
+               "sphinx.ext.mathjax",     # inline math
+               "sphinx.ext.autodoc",     # generate info from docstrings
+               "sphinxcontrib.ditaa",    # diagrams-through-ascii-art
+               "sphinxcontrib.plantuml"  # text -> uml diagrams
+              ]
+
+# note: breathe requires doxygen xml output -> must have GENERATE_XML = YES in Doxyfile.in
+#       match project name in Doxyfile.in
+breathe_default_project = "xodoxxml"
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+pygments_style = 'sphinx'
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+#html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
+html_static_path = ['_static']
+html_favicon = '_static/img/favicon.ico'
--- a/xo-tokenizer/docs/examples.rst
+++ b/xo-tokenizer/docs/examples.rst
@ -1,6 +1,6 @@
 .. _examples:

-.. toctree
+.. toctree::
   :maxdepth: 2

 Examples
@ -31,19 +31,28 @@ See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code

            // input may contain multiple tokens
            while (!input.empty()) {
-                auto [tk, nread] = tkz.scan(input);
+                auto [tk, consumed, error] = tkz.scan(input);

                if (tk.is_valid()) {
                    cout << tk;
                }

-                input = input.after_prefix(nread);
+                input = input.after_prefix(consumed.size());
            }
        }

-        auto tk = tkz.notify_eof();
+        auto [tk, consumed, error] = tkz.notify_eof(spxn_type::from_string(input_str));

        if (tk.is_valid()) {
            cout << tk;
        }
    }
+
+.. code-block::
+   :linenos:
+
+    $ .build/xo-tokenizer/utest/utest.tokenizer
+    > 123
+    <token :type tk_i64 :text 123>
+    > 123e5
+    <token :type tk_f64 :text 123e5>
--- a/xo-tokenizer/docs/implementation.rst
+++ b/xo-tokenizer/docs/implementation.rst
@ -0,0 +1,36 @@
+.. _implementation:
+
+.. toctree::
+   :maxdepth: 2
+
+Components
+==========
+
+Library dependency tower for *xo-tokenizer*:
+
+.. ditaa::
+
+    +-----------------+
+    |     xo_unit     |
+    +-----------------+
+    |  xo_indentlog   |
+    +-----------------+
+    |    xo_cmake     |
+    +-----------------+
+
+Install instructions :doc:`here<install>`
+
+Abstraction tower for *xo-tokenizer* components:
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
--- a/xo-tokenizer/docs/index.rst
+++ b/xo-tokenizer/docs/index.rst
@ -1,6 +1,6 @@
 .. xo-tokenizer documentation master file.

-xo-tokenizer documentation
+Xo-tokenizer documentation
 ==========================

 xo-tokenizer provides a tokenizer for the Schematika language.
@ -15,5 +15,8 @@ may appear in variable names:  ``one-of-those-days`` is an ordinary symbol.

   install
   examples
-   genindex
-   search
+   implementation
+   token-class
+   tokenizer-error-class
+   span-class
+   tokentype-enum
--- a/xo-tokenizer/docs/install.rst
+++ b/xo-tokenizer/docs/install.rst
@ -1,8 +1,23 @@
 .. _install:

-.. toctree
+.. toctree::
   :maxdepth: 2

+Source
+======
+
+Souce code lives on github `here`_
+
+.. _here: https://github.com/rconybea/xo-tokenizer
+
+To clone from git:
+
+.. code-block:: bash
+
+    git clone https://github.com/rconybea/xo-tokenizer
+
+Tested with gcc 13.3
+
 Install
 =======

--- a/xo-tokenizer/docs/span-class.rst
+++ b/xo-tokenizer/docs/span-class.rst
@ -0,0 +1,84 @@
+
+.. _span-class:
+
+Span
+====
+
+Identify an unowned contiguous memory range
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |cBLU      span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/span.hpp>
+
+.. uml::
+    :scale: 99%
+    :align: center
+
+    allowmixing
+
+    object span1<<span>>
+    span1 : lo = p
+    span2 : hi = p+25
+
+    object dest<<memory>>
+    dest : def fact(n : i64) { ... }
+
+
+- Identify a sequence of characters stored in contiguous memory.
+
+- Lightweight, consists of a pair of pointers.
+
+- Does not own storage. Lifetime management for target memory is
+  up to the caller.
+
+
+Class
+-----
+
+.. doxygenclass:: xo::scm::span
+
+Member Variables
+----------------
+
+.. doxygengroup:: span-instance-vars
+
+Type Traits
+-----------
+
+.. doxygengroup:: span-type-traits
+
+Constructors
+------------
+
+.. doxygengroup:: span-ctors
+
+Access Methods
+--------------
+
+.. doxygengroup:: span-access-methods
+
+General Methods
+---------------
+
+.. doxygengroup:: span-general-methods
+
+Operators
+---------
+
+.. doxygengroup:: span-operators
--- a/xo-tokenizer/docs/token-class.rst
+++ b/xo-tokenizer/docs/token-class.rst
@ -0,0 +1,94 @@
+
+.. _token-class:
+
+Token
+=====
+
+Represent a single lexical token in the Schematika language
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |cBLU token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/token.hpp>
+
+.. uml::
+    :scale: 99%
+    :align: center
+
+    allowmixing
+
+    object tk1<<token>>
+    tk1 : tk_type = tk_i64
+    tk1 : text = "123"
+
+    object tk2<<token>>
+    tk2 : tk_type = tk_string
+    tk2 : text = "the quick brown fox"
+
+- Represent a single lexical token
+
+- Does not share any storage with original input stream
+  (maintains a local copy).
+
+- Remembers copied input extent.
+  Convert on demand to native untagged representation
+
+Example
+-------
+
+.. code-block:: cpp
+
+    void foo() {
+        using namespace xo::scm;
+
+        token<char> tk = token<char>::i64_token("123");
+
+        tk.is_valid(); // -> true
+        tk.text(); // -> "123"s;
+
+        tk.tk_type(); // -> tokentype::tk_i64
+        tk.i64_value(); // -> 123
+
+        cout << tk << endl; // -> <token :type i64 :text 123>
+    }
+
+Class
+-----
+
+.. doxygenclass:: xo::scm::token
+
+
+Instance Variables
+------------------
+
+.. doxygengroup:: token-instance-vars
+
+Constructors
+------------
+
+.. doxygengroup:: token-ctors
+
+Access Methods
+--------------
+
+.. doxygengroup:: token-access-methods
+
+General Methods
+---------------
+
+.. doxygengroup:: token-general-methods
--- a/xo-tokenizer/docs/tokenizer-class.rst
+++ b/xo-tokenizer/docs/tokenizer-class.rst
@ -0,0 +1,27 @@
+
+.. _tokenizer-class:
+
+Tokenizer
+=========
+
+Parse a Schematika character stream into lexical tokens
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |cBLU            tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/tokenizer.hpp>
--- a/xo-tokenizer/docs/tokenizer-error-class.rst
+++ b/xo-tokenizer/docs/tokenizer-error-class.rst
@ -0,0 +1,52 @@
+
+.. _tokenizer-error-class
+
+Tokenizer Error
+===============
+
+Represent a possible tokenizer error result, including parsing context
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |cBLU tokenizer_error   |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/tokenizer_error.hpp>
+
+Class
+------
+
+.. doxygenclass:: xo::scm::tokenizer_error
+
+Instance Variables
+------------------
+
+.. doxygengroup:: tokenizer-error-instance-vars
+
+Constructors
+------------
+
+.. doxygengroup:: tokenizer-error-ctors
+
+Access Methods
+--------------
+
+.. doxygengroup:: tokenizer-error-access-methods
+
+General Methods
+---------------
+
+.. doxygengroup:: tokenizer-error-general-methods
--- a/xo-tokenizer/docs/tokentype-enum.rst
+++ b/xo-tokenizer/docs/tokentype-enum.rst
@ -0,0 +1,34 @@
+
+.. _tokentype-enum:
+
+Tokentype
+=========
+
+Distinguish different lexical tokens for the Schematika language.
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |cBLU tokentype   |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/tokentype.hpp>
+
+Enum
+----
+
+.. doxygenfunction:: xo::scm::tokentype_descr
+
+.. doxygenfunction:: xo::scm::operator<<(std::ostream&,tokentype)
				`@ -0,0 +1 @@`
				`add any static {.html, .js, ..} files for sphinx to pickup here`