xo-tokenizer: docs + error-handling improvement

drop exceptions for return-value error object
2025-06-23 23:08:12 -05:00 · 2025-06-23 23:08:12 -05:00 · 6fbfd065a2
commit 6fbfd065a2
parent f9961a1e37
30 changed files with 1086 additions and 162 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -115,6 +115,6 @@ add_subdirectory(xo-pyjit)
 # ----------------------------------------------------------------
 # documentation.  must follow add_subdirectory() for satellite projects

-xo_umbrella_doxygen_deps(xo_flatstring xo_ratio)
+xo_umbrella_doxygen_deps(xo_flatstring xo_ratio xo_unit xo_tokenizer xo_jit)
 xo_umbrella_doxygen_config()
-xo_umbrella_sphinx_config(index.rst docs/install.rst)
+xo_umbrella_sphinx_config(index.rst docs/install.rst docs/glossary.rst)
--- a/cmake/xo-bootstrap-macros.cmake
+++ b/cmake/xo-bootstrap-macros.cmake
@ -0,0 +1,35 @@
+# ----------------------------------------------------------------
+# for example:
+#   $ PREFIX=/usr/local   # for example
+#   $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build
+#
+# will get
+#   CMAKE_MODULE_PATH
+# from xo-cmake-config --cmake-module-path
+#
+# and expect .cmake macros in
+#   CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake
+# ----------------------------------------------------------------
+
+find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED)
+
+if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND")
+    message(FATAL "could not find xo-cmake-config executable")
+endif()
+
+message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}")
+
+if (NOT XO_SUBMODULE_BUILD)
+    if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix))
+        # default to typical install location for xo-project-macros
+        execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH)
+        message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
+    endif()
+endif()
+
+# needs to have been installed somewhere on CMAKE_MODULE_PATH,
+# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX)
+#
+include(xo_macros/xo_cxx)
+
+xo_cxx_bootstrap_message()
--- a/conf.py
+++ b/conf.py
@ -44,3 +44,11 @@ pygments_style = 'sphinx'
 html_theme = 'sphinx_rtd_theme'
 html_static_path = ['_static']
 html_favicon = '_static/img/favicon.ico'
+
+# disable caching (at least helpful in development)
+
+html_meta = {
+    'http-equiv=Cache-Control': 'no-cache, no-store, must-revalidate',
+    'http-equiv=Pragma': 'no-cache',
+    'http-equiv=Expires': '0'
+}
--- a/default.nix
+++ b/default.nix
@ -109,7 +109,7 @@ let
 #
          xo-expression     = self.callPackage pkgs/xo-expression.nix     {};
          xo-pyexpression   = self.callPackage pkgs/xo-pyexpression.nix   {};
-          xo-tokenizer      = self.callPackage pkgs/xo-tokenizer.nix      {};
+          xo-tokenizer      = self.callPackage pkgs/xo-tokenizer.nix      { buildDocs = true; };
          xo-reader         = self.callPackage pkgs/xo-reader.nix         {};

          xo-jit            = self.callPackage pkgs/xo-jit.nix            { #stdenv = jitStdenv;
@ -152,11 +152,18 @@ pkgs.mkShell {
    pkgs.python3Packages.python
    pkgs.python3Packages.pybind11
    pkgs.python3Packages.sphinx-rtd-theme
+    #pkgs.python3Packages.sphinx-autobuild   # needs patch for typeguard; defer for now
    pkgs.python3Packages.breathe
    pkgs.python3Packages.sphinxcontrib-ditaa
    pkgs.python3Packages.sphinxcontrib-plantuml
    pkgs.python3Packages.pillow

+    pkgs.gdb
+
+    pkgs.emacs
+    pkgs.ditaa
+    pkgs.ripgrep
+    pkgs.git
    pkgs.cloc

    pkgs.sphinx
@ -169,6 +176,8 @@ pkgs.mkShell {
    pkgs.eigen
    pkgs.cmake
    pkgs.catch2
+    pkgs.zlib
+    pkgs.unzip
  ];

  shellHook = ''
--- a/docs/glossary.rst
+++ b/docs/glossary.rst
@ -1 +1,12 @@
-scm = schematika
+.. _glossary:
+
+Glossary
+--------
+
+.. glossary::
+    schematika
+    scm
+      | Experimental programming language.
+      | Designed for convenient integration with C++ and python.
+
+.. toctree::
--- a/docs/install.rst
+++ b/docs/install.rst
@ -92,8 +92,11 @@ Aternatively can enter nix environment, then follow instructions for cmake build
    # etc


+Development
+===========
+
 LSP Setup
-=========
+---------

 To setup xo-umbrella2 build to work with a language server:

@ -105,3 +108,13 @@ To setup xo-umbrella2 build to work with a language server:

 In this case subsystem LSP setup should be omitted, git root is ``path/to/xo-umbrella2``,
 not ``path/to/xo-umbrella2/xo-ratio`` etc.
+
+Sphinx Autobuild Setup
+----------------------
+
+To serve cache-busting headers
+
+.. code-block::
+
+    $ cd xo-umbrella2
+    $ sphinx-autobuild . .build/sphinx/html --port 3000
--- a/index.rst
+++ b/index.rst
@ -17,5 +17,6 @@ Some features: kalman filters, stochastic processes, complex event processing, s
   xo-unit/docs/index
   xo-tokenizer/docs/index
   xo-jit/docs/index
+   glossary
   genindex
   search
--- a/xo-tokenizer/docs/CMakeLists.txt
+++ b/xo-tokenizer/docs/CMakeLists.txt
@ -1,5 +1,8 @@
-# xo-tokenizer/CMakeLists.txt
+# xo-tokenizer/docs/CMakeLists.txt

 xo_doxygen_collect_deps()
 xo_docdir_doxygen_config()
-xo_docdir_sphinx_config(index.rst install.rst)
+xo_docdir_sphinx_config(
+    index.rst install.rst examples.rst implementation.rst
+    token-class.rst tokenizer-error-class.rst span-class.rst tokentype-enum.rst
+)
--- a/xo-tokenizer/docs/_static/README
+++ b/xo-tokenizer/docs/_static/README
@ -0,0 +1 @@
+add any static {.html, .js, ..} files for sphinx to pickup here
--- a/xo-tokenizer/docs/_static/img/favicon.ico
+++ b/xo-tokenizer/docs/_static/img/favicon.ico
--- a/xo-tokenizer/docs/conf.py
+++ b/xo-tokenizer/docs/conf.py
@ -0,0 +1,39 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'xo tokenizer documentation'
+copyright = '2024-2025, Roland Conybeare'
+author = 'Roland Conybeare'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+#extensions = []
+extensions = [ "breathe",
+               "sphinx.ext.mathjax",     # inline math
+               "sphinx.ext.autodoc",     # generate info from docstrings
+               "sphinxcontrib.ditaa",    # diagrams-through-ascii-art
+               "sphinxcontrib.plantuml"  # text -> uml diagrams
+              ]
+
+# note: breathe requires doxygen xml output -> must have GENERATE_XML = YES in Doxyfile.in
+#       match project name in Doxyfile.in
+breathe_default_project = "xodoxxml"
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+pygments_style = 'sphinx'
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+#html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
+html_static_path = ['_static']
+html_favicon = '_static/img/favicon.ico'
--- a/xo-tokenizer/docs/examples.rst
+++ b/xo-tokenizer/docs/examples.rst
@ -1,6 +1,6 @@
 .. _examples:

-.. toctree
+.. toctree::
   :maxdepth: 2

 Examples
@ -31,19 +31,28 @@ See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code

            // input may contain multiple tokens
            while (!input.empty()) {
-                auto [tk, nread] = tkz.scan(input);
+                auto [tk, consumed, error] = tkz.scan(input);

                if (tk.is_valid()) {
                    cout << tk;
                }

-                input = input.after_prefix(nread);
+                input = input.after_prefix(consumed.size());
            }
        }

-        auto tk = tkz.notify_eof();
+        auto [tk, consumed, error] = tkz.notify_eof(spxn_type::from_string(input_str));

        if (tk.is_valid()) {
            cout << tk;
        }
    }
+
+.. code-block::
+   :linenos:
+
+    $ .build/xo-tokenizer/utest/utest.tokenizer
+    > 123
+    <token :type tk_i64 :text 123>
+    > 123e5
+    <token :type tk_f64 :text 123e5>
--- a/xo-tokenizer/docs/implementation.rst
+++ b/xo-tokenizer/docs/implementation.rst
@ -0,0 +1,36 @@
+.. _implementation:
+
+.. toctree::
+   :maxdepth: 2
+
+Components
+==========
+
+Library dependency tower for *xo-tokenizer*:
+
+.. ditaa::
+
+    +-----------------+
+    |     xo_unit     |
+    +-----------------+
+    |  xo_indentlog   |
+    +-----------------+
+    |    xo_cmake     |
+    +-----------------+
+
+Install instructions :doc:`here<install>`
+
+Abstraction tower for *xo-tokenizer* components:
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
--- a/xo-tokenizer/docs/index.rst
+++ b/xo-tokenizer/docs/index.rst
@ -1,6 +1,6 @@
 .. xo-tokenizer documentation master file.

-xo-tokenizer documentation
+Xo-tokenizer documentation
 ==========================

 xo-tokenizer provides a tokenizer for the Schematika language.
@ -15,5 +15,8 @@ may appear in variable names:  ``one-of-those-days`` is an ordinary symbol.

   install
   examples
-   genindex
-   search
+   implementation
+   token-class
+   tokenizer-error-class
+   span-class
+   tokentype-enum
--- a/xo-tokenizer/docs/install.rst
+++ b/xo-tokenizer/docs/install.rst
@ -1,8 +1,23 @@
 .. _install:

-.. toctree
+.. toctree::
   :maxdepth: 2

+Source
+======
+
+Souce code lives on github `here`_
+
+.. _here: https://github.com/rconybea/xo-tokenizer
+
+To clone from git:
+
+.. code-block:: bash
+
+    git clone https://github.com/rconybea/xo-tokenizer
+
+Tested with gcc 13.3
+
 Install
 =======

--- a/xo-tokenizer/docs/span-class.rst
+++ b/xo-tokenizer/docs/span-class.rst
@ -0,0 +1,84 @@
+
+.. _span-class:
+
+Span
+====
+
+Identify an unowned contiguous memory range
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |cBLU      span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/span.hpp>
+
+.. uml::
+    :scale: 99%
+    :align: center
+
+    allowmixing
+
+    object span1<<span>>
+    span1 : lo = p
+    span2 : hi = p+25
+
+    object dest<<memory>>
+    dest : def fact(n : i64) { ... }
+
+
+- Identify a sequence of characters stored in contiguous memory.
+
+- Lightweight, consists of a pair of pointers.
+
+- Does not own storage. Lifetime management for target memory is
+  up to the caller.
+
+
+Class
+-----
+
+.. doxygenclass:: xo::scm::span
+
+Member Variables
+----------------
+
+.. doxygengroup:: span-instance-vars
+
+Type Traits
+-----------
+
+.. doxygengroup:: span-type-traits
+
+Constructors
+------------
+
+.. doxygengroup:: span-ctors
+
+Access Methods
+--------------
+
+.. doxygengroup:: span-access-methods
+
+General Methods
+---------------
+
+.. doxygengroup:: span-general-methods
+
+Operators
+---------
+
+.. doxygengroup:: span-operators
--- a/xo-tokenizer/docs/token-class.rst
+++ b/xo-tokenizer/docs/token-class.rst
@ -0,0 +1,94 @@
+
+.. _token-class:
+
+Token
+=====
+
+Represent a single lexical token in the Schematika language
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |cBLU token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/token.hpp>
+
+.. uml::
+    :scale: 99%
+    :align: center
+
+    allowmixing
+
+    object tk1<<token>>
+    tk1 : tk_type = tk_i64
+    tk1 : text = "123"
+
+    object tk2<<token>>
+    tk2 : tk_type = tk_string
+    tk2 : text = "the quick brown fox"
+
+- Represent a single lexical token
+
+- Does not share any storage with original input stream
+  (maintains a local copy).
+
+- Remembers copied input extent.
+  Convert on demand to native untagged representation
+
+Example
+-------
+
+.. code-block:: cpp
+
+    void foo() {
+        using namespace xo::scm;
+
+        token<char> tk = token<char>::i64_token("123");
+
+        tk.is_valid(); // -> true
+        tk.text(); // -> "123"s;
+
+        tk.tk_type(); // -> tokentype::tk_i64
+        tk.i64_value(); // -> 123
+
+        cout << tk << endl; // -> <token :type i64 :text 123>
+    }
+
+Class
+-----
+
+.. doxygenclass:: xo::scm::token
+
+
+Instance Variables
+------------------
+
+.. doxygengroup:: token-instance-vars
+
+Constructors
+------------
+
+.. doxygengroup:: token-ctors
+
+Access Methods
+--------------
+
+.. doxygengroup:: token-access-methods
+
+General Methods
+---------------
+
+.. doxygengroup:: token-general-methods
--- a/xo-tokenizer/docs/tokenizer-class.rst
+++ b/xo-tokenizer/docs/tokenizer-class.rst
@ -0,0 +1,27 @@
+
+.. _tokenizer-class:
+
+Tokenizer
+=========
+
+Parse a Schematika character stream into lexical tokens
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |cBLU            tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/tokenizer.hpp>
--- a/xo-tokenizer/docs/tokenizer-error-class.rst
+++ b/xo-tokenizer/docs/tokenizer-error-class.rst
@ -0,0 +1,52 @@
+
+.. _tokenizer-error-class
+
+Tokenizer Error
+===============
+
+Represent a possible tokenizer error result, including parsing context
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |cBLU tokenizer_error   |          |
+    +-----------------+-----------------------+          |
+    |    tokentype    |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/tokenizer_error.hpp>
+
+Class
+------
+
+.. doxygenclass:: xo::scm::tokenizer_error
+
+Instance Variables
+------------------
+
+.. doxygengroup:: tokenizer-error-instance-vars
+
+Constructors
+------------
+
+.. doxygengroup:: tokenizer-error-ctors
+
+Access Methods
+--------------
+
+.. doxygengroup:: tokenizer-error-access-methods
+
+General Methods
+---------------
+
+.. doxygengroup:: tokenizer-error-general-methods
--- a/xo-tokenizer/docs/tokentype-enum.rst
+++ b/xo-tokenizer/docs/tokentype-enum.rst
@ -0,0 +1,34 @@
+
+.. _tokentype-enum:
+
+Tokentype
+=========
+
+Distinguish different lexical tokens for the Schematika language.
+
+Context
+-------
+
+.. ditaa::
+    :--scale: 0.85
+
+    +-----------------------------------------+----------+
+    |                tokenizer                |          |
+    +-----------------------------------------+          |
+    |               scan_result               |          |
+    +-----------------+-----------------------+  buffer  |
+    |     token       |    tokenizer_error    |          |
+    +-----------------+-----------------------+          |
+    |cBLU tokentype   |          span         |          |
+    +-----------------+-----------------------+----------+
+
+.. code-block:: cpp
+
+    #include <xo/tokenizer/tokentype.hpp>
+
+Enum
+----
+
+.. doxygenfunction:: xo::scm::tokentype_descr
+
+.. doxygenfunction:: xo::scm::operator<<(std::ostream&,tokentype)
--- a/xo-tokenizer/example/tokenrepl/tokenrepl.cpp
+++ b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp
@ -41,21 +41,35 @@ main() {
            if (tk.is_valid()) {
                cout << tk << endl;
            } else if (error.is_error()) {
-                cout << "parsing error: " << error << endl;
-                /* discard remainder of input line */
+                cout << "parsing error: " << endl;
+                error.report(cout);
+
                break;
            }

-            input = input.after_prefix(consumed.size());
-        }
+            input = tkz.consume(consumed, input);
+            //input = input.after_prefix(consumed.size());
        }

-    auto [tk, consumed, error] = tkz.notify_eof(span_type::from_string(input_str));
+        /* discard stashed remainder of input line
+         * (for nicely-formatted errors)
+         */
+        tkz.discard_current_line();
+    }
+
+    {
+        span_type input = span_type::from_string(input_str);
+
+        auto [tk, consumed, error] = tkz.notify_eof(input);
+
+        input = tkz.consume(consumed, input);

        if (tk.is_valid()) {
            cout << tk << endl;
        } else if (error.is_error()) {
-        cout << "parsing error: " << error << endl;
+            cout << "parsing error: " << endl;
+            error.report(cout);
+        }
    }
 }

--- a/xo-tokenizer/include/xo/tokenizer/error_token.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/error_token.hpp
--- a/xo-tokenizer/include/xo/tokenizer/scan_result.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp
@ -10,8 +10,10 @@

 namespace xo {
    namespace scm {
-        /** @brief Represent result of parsing one input token.
+        /** @class scan_result
+         *  @brief Represent result of parsing one input token.
         *
+         * @code
         *  Possible outcomes fall into several categories
         *  (with T: @c token_.is_valid(), E: @cerror_.is_error())
         *
@ -21,6 +23,7 @@ namespace xo {
         *  | true  | false | parsed token in T |
         *  | false | true  | parse error in E  |
         *
+         * @endcode
         **/
        template <typename CharT>
        class scan_result {
@ -37,6 +40,7 @@ namespace xo {

            static scan_result make_whitespace(const span_type & prefix_input);
            static scan_result make_partial(const span_type & prefix_input);
+            static scan_result make_error(const error_type & error);

            bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
            bool is_token() const { return token_.is_valid(); }
@ -67,6 +71,12 @@ namespace xo {
            return scan_result(token_type::invalid(), prefix_input /*consumed*/);
        }

+        template <typename CharT>
+        auto scan_result<CharT>::make_error(const error_type & error) -> scan_result
+        {
+            return scan_result(token_type::invalid(), span_type::make_null(), error);
+        }
+
    } /*namespace scm*/
 } /*namespace xo*/

--- a/xo-tokenizer/include/xo/tokenizer/span.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/span.hpp
@ -11,21 +11,32 @@ namespace xo {
    namespace scm {
        /** @class span compression/span.hpp
         *
-         *  @brief Represents a contiguous memory range,  without ownership.
+         *  @brief A contiguous range of characters,  without ownership.
         *
         *  @tparam CharT type for elements referred to by this span.
         **/
        template <typename CharT>
        class span {
        public:
-            /** @brief typealias for span size (in units of CharT) **/
+            /** @defgroup span-type-traits span type traits **/
+            ///@{
+
+            /** typealias for span size (in units of CharT) **/
            using size_type = std::uint64_t;

+            ///@}
+
        public:
-            /** @brief create span for the contiguous memory range [@p lo, @p hi) **/
+            /** @defgroup span-ctors span constructors **/
+            ///@{
+
+            /** Create span for the contiguous memory range [@p lo, @p hi) **/
            span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}

-            /** @brief create a null span (i.e. with null @p lo, @p hi pointers) **/
+            /** Create a null span (i.e. with null @p lo, @p hi pointers)
+             *  A null span can be concatenated with any other span
+             *  without triggering matching-endpoint asserts.
+             **/
            static span make_null() { return span(nullptr, nullptr); }

            /** @brief create span for C-style string @p cstr **/
@ -65,16 +76,20 @@ namespace xo {
                return span(lo, hi);
            }

-            ///@{
+            ///@}

-            /** @name getters **/
+            /** @defgroup span-access-methods **/
+            ///@{

            CharT * lo() const { return lo_; } /* get member span::lo_ */
            CharT * hi() const { return hi_; } /* get member span::hi_ */

            ///@}

-            /** @brief create new span over supplied type,
+            /** @defgroup span-general-methods **/
+            ///@{
+
+            /** Create new span over supplied type,
             *  with identical (possibly misaligned) endpoints.
             *
             *  @warning
@ -121,7 +136,9 @@ namespace xo {
                return after_prefix(prefix.size());
            }

-            /** @brief create span starting with position p **/
+            /** Create span starting with position @p p.
+             *  Does boundary checking; will return empty span if @p p is outside @c [lo_,hi)
+             **/
            span suffix_from(CharT * p) const {
                if ((lo_ <= p) && (p <= hi_))
                    return span(p, hi_);
@ -129,13 +146,16 @@ namespace xo {
                    return span(hi_, hi_);
            }

-            /** @brief true iff this span is null.  distinct from empty. **/
+            /** true iff this span is null.  distinct from empty. **/
            bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
-            /** @brief true iff this span is empty (comprises 0 elements). **/
+            /** true iff this span is empty (comprises 0 elements). **/
            bool empty() const { return lo_ == hi_; }
-            /** @brief report the number of elements (of type CharT) in this span. **/
+            /** report the number of elements (of type CharT) in this span. **/
            size_type size() const { return hi_ - lo_; }

+            /** increase extent of this spans to include @p x.
+             *  Requires @c hi() == @c x.lo()
+             **/
            span & operator+=(const span & x) {
                if (hi_ == x.lo_) {
                    hi_ = x.hi_;
@ -154,15 +174,18 @@ namespace xo {
                   << " :text " << xo::print::quot(std::string_view(lo_, hi_))
                   << ">";
            }
+            ///@}

        private:
+            /** @defgroup span-instance-vars **/
            ///@{

-            /** @brief start of span
+            /** start of span.
                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
            **/
            CharT * lo_ = nullptr;
-            /** @brief end of span
+
+            /** @brief end of span.
                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
            **/
            CharT * hi_ = nullptr;
@ -170,6 +193,12 @@ namespace xo {
            ///@}
        }; /*span*/

+        /** @defgroup span-operators **/
+        ///@{
+
+        /** compare spans for equality.
+         *  Two spans are equal iff both endpoints match exactly.
+         **/
        template <typename CharT>
        inline bool
        operator==(const span<CharT> & lhs, const span<CharT> & rhs) {
@ -177,6 +206,9 @@ namespace xo {
                    && (lhs.hi() == rhs.hi()));
        }

+        /** compare spans for inequality.
+         *  Two spans are unequal if either paired endpoint differs.
+         **/
        template <typename CharT>
        inline bool
        operator!=(const span<CharT> & lhs, const span<CharT> & rhs) {
@ -184,6 +216,7 @@ namespace xo {
                    || (lhs.hi() != rhs.hi()));
        }

+        /** print a summary of @p x on stream @p os. Intended for diagnostics **/
        template <typename CharT>
        inline std::ostream &
        operator<<(std::ostream & os,
@ -191,5 +224,33 @@ namespace xo {
            x.print(os);
            return os;
        }
+
+        ///@}
    } /*namespace scm*/
+
+    namespace print {
+        template <typename CharT>
+        class printspan_impl {
+        public:
+            printspan_impl(xo::scm::span<CharT> x) : span_{x} {}
+
+            xo::scm::span<CharT> span_;
+        };
+
+        template <typename CharT>
+        printspan_impl<CharT> printspan(const xo::scm::span<CharT>& span) {
+            return printspan_impl<CharT>(span);
+        }
+
+        template <typename CharT>
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const printspan_impl<CharT> & x)
+        {
+            for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p)
+                os << *p;
+
+            return os;
+        }
+    }
 } /*namespace xo*/
--- a/xo-tokenizer/include/xo/tokenizer/token.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/token.hpp
@ -43,75 +43,137 @@ namespace xo {
            }
        }

+        /** @class token
+         *  @brief Represent a Schematika lexical token
+         **/
        template <typename CharT>
        class token {
        public:
+            /** @defgroup token-ctors token constructors **/
+            ///@{
+
+            /** default ctor creates token with type @c tk_invalid **/
            token() = default;
+            /** create token with type @c tk_type and input text @c text **/
            token(tokentype tk_type, const std::string & text = "")
                : tk_type_{tk_type}, text_{text} {}

+            /** create invalid token (same as null ctor, but explicit) **/
            static token invalid() { return token(); }
+            /** Create token representing 64-bit signed integer literal parsed from decimal @p txt.
+             *  The string @p txt must be a decimal integer literal, since @ref i64_value re-parses @p txt.
+             **/
            static token i64_token(const std::string & txt) {
                return token(tokentype::tk_i64, txt);
            }
+            /** create token representing 64-bit floating-point literal parsed from decimal @p txt
+             *  The string @p txt must be a decimal floating-point literal, since @ref f64_value re-parses @p txt.
+             **/
            static token f64_token(const std::string & txt) {
                return token(tokentype::tk_f64, txt);
            }
+            /** create token representing literal string parsed from @p txt **/
            static token string_token(const std::string & txt) {
                return token(tokentype::tk_string, txt);
            }
+            /** create token representing a symbol parsed from @p txt.
+             *  Note that not all strings are valid symbol names.
+             **/
            static token symbol_token(const std::string & txt) {
                return token(tokentype::tk_symbol, txt);
            }
+            /** token representing left angle bracket @c "<" **/
            static token leftangle() { return token(tokentype::tk_leftangle); }
+            /** token representing right angle bracket @c ">" **/
            static token rightangle() { return token(tokentype::tk_rightangle); }
+            /** token representing left parenthesis @c "(" **/
            static token leftparen() { return token(tokentype::tk_leftparen); }
+            /** token representing right parenthesis @c ")" **/
            static token rightparen() { return token(tokentype::tk_rightparen); }
+            /** token representing left bracket @c "[" **/
            static token leftbracket() { return token(tokentype::tk_leftbracket); }
+            /** token representing right bracket @c "]" **/
            static token rightbracket() { return token(tokentype::tk_rightbracket); }
+            /** token representing left brace @c "{" **/
            static token leftbrace() { return token(tokentype::tk_leftbrace); }
+            /** token representing right brace @c "}' **/
            static token rightbrace() { return token(tokentype::tk_rightbrace); }
+            /** token representing period @c "." **/
            static token dot() { return token(tokentype::tk_dot); }
+            /** token representing comma @c "," **/
            static token comma() { return token(tokentype::tk_comma); }
+            /** token representing colon @c ":" **/
            static token colon() { return token(tokentype::tk_colon); }
+            /** token representing double-colo @c "::" **/
            static token doublecolon() { return token(tokentype::tk_doublecolon); }
+            /** token representing semicolon @c ";" **/
            static token semicolon() { return token(tokentype::tk_semicolon); }
+            /** token representing single-assignment @c "=" **/
            static token singleassign() { return token(tokentype::tk_singleassign); }
+            /** token representing unrestricted assignment @c ":=" **/
            static token assign_token() { return token(tokentype::tk_assign); }
+            /** token representing indirection @c "->" **/
            static token yields() { return token(tokentype::tk_yields); }

+            /** token for @c "+" **/
            static token plus_token() { return token(tokentype::tk_plus); }
+            /** token for @c "-" **/
            static token minus_token() { return token(tokentype::tk_minus); }
+            /** token for @c "*" **/
            static token star_token() { return token(tokentype::tk_star); }
+            /** token for @c "/" **/
            static token slash_token() { return token(tokentype::tk_slash); }

+            /** token representing keyword @c type **/
            static token type() { return token(tokentype::tk_type); }
+            /** token representing keyword @c def **/
            static token def() { return token(tokentype::tk_def); }
+            /** token representing keyword @c lambda **/
            static token lambda() { return token(tokentype::tk_lambda); }
+            /** token representing keyword @c if **/
            static token if_token() { return token(tokentype::tk_if); }
+            /** token representing keyword @c let **/
            static token let() { return token(tokentype::tk_let); }
+            /** token representing keyword @c in **/
            static token in() { return token(tokentype::tk_in); }
+            /** token representing keyword @c end **/
            static token end() { return token(tokentype::tk_end); }

+            ///@}
+
+            /** @defgroup token-access-methods **/
+            ///@{
+
            tokentype tk_type() const { return tk_type_; }
            const std::string & text() const { return text_; }

+            ///@}
+
+            /** @defgroup token-general-methods **/
+            ///@{
+
+            /** true if token understood to represent valid input
+             *  i.e. any token type except @c tk_invalid
+             **/
            bool is_valid() const { return tk_type_ != tokentype::tk_invalid; }
+            /** true for sentinel token with type tk_invalid **/
            bool is_invalid() const { return tk_type_ == tokentype::tk_invalid; }

-            /** expect input matching
-             *    [+|-][0-9][0-9]*
-             **/
+            /** expect input matching @c "[+|-][0-9][0-9]*" **/
            std::int64_t i64_value() const;
-            /** expect input matching
-             *    [+|-][0-9]*[.][0-9]*[e|E][+|-][0-9]*
-             **/
+
+            /** expect input matching @c "[+|-][0-9]*[.][0-9]*[e|E][+|-][0-9]*" **/
            double f64_value() const;

            /** print human-readable token representation on stream @p os **/
            void print(std::ostream & os) const;

+            ///@}
+
        private:
+            /** @defgroup token-instance-vars **/
+            ///@{
+
            /** category for this token **/
            tokentype tk_type_ = tokentype::tk_invalid;

@ -124,6 +186,8 @@ namespace xo {
             *    tk_symbol
             **/
            std::string text_;
+
+            ///@}
        }; /*token*/

        template <typename CharT>
--- a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
@ -13,9 +13,15 @@

 namespace xo {
    namespace scm {
-        /**
+        /** @class tokenizer
+         *  @brief Parse a Schematika character stream into lexical tokens
+         *
         *  Use:
+         *
         *  @code
+         *    // see xo-tokenizer/example/tokenrepl/tokenrepl.cpp
+         *    // for exact working code
+         *
         *    using tokenizer_type = tokenizer<char>;
         *    using span_type = tokenizer_type::span_type;
         *
@ -24,21 +30,19 @@ namespace xo {
         *
         *    while (!input.empty()) {
         *        auto res = tkz.scan(input);
-         *        const auto & tk = res.first;
+         *        auto [tk, consumed, error] = res.first;
         *
         *        // do something with tk if tk.is_valid()
         *
-         *        input = input.after_prefix(res.second);
+         *        input = tkz.consume(res.second, input);
         *    }
         *
         *    if endofinput {
-         *        auto tk = tzk.notify_eof()
+         *        auto [tk, consumed, error] = tzk.notify_eof()
         *
-         *        // do something with tk if tk.is_valid()
+         *        // do something with (final) tk if tk.is_valid()
         *    }
         *
-         *    // expect !tkz.has_prefix()
-         *
         *  @endcode
         *
         * See tokentype.hpp for token types
@ -47,6 +51,7 @@ namespace xo {
        class tokenizer {
        public:
            using token_type = token<CharT>;
+            using error_type = tokenizer_error<CharT>;
            using span_type = span<const CharT>;
            using result_type = scan_result<CharT>;

@ -122,11 +127,22 @@ namespace xo {
             **/
            result_type scan2(const span_type & input, bool eof);

+            /** @retval span with @p consumed permanently removed from @p input.
+             *
+             *  Purpose of this method is to update @ref current_pos_.
+             **/
+            span_type consume(const span_type & consumed, const span_type & input);
+
+            /** discard current line after error.  Just cleans up error-reporting state **/
+            void discard_current_line();
+
            /** notify end of input,  resolving any ambiguous input stashed in .prefix
             **/
            result_type notify_eof(const span_type & input);

        private:
+            void capture_current_line(const span_type & input);
+
            result_type scan_completion(const span_type & whitespace,
                                        const CharT* token_end,
                                        const span_type & input);
@ -134,8 +150,10 @@ namespace xo {
        private:
            /** true to log tokenizer activity to stdout **/
            bool debug_flag_ = false;
-            /** remember start of current line here **/
+            /** remember current input line.  Used only to report errors **/
            span_type current_line_ = span_type::make_null();
+            /** current input position within @ref current_line_ **/
+            size_t current_pos_ = 0;
            /** Accumulate partial token here.
             *  This will happen if input sent to @ref tokenizer::scan
             *  ends without a determinate token boundary.
@ -348,29 +366,35 @@ namespace xo {
                            } else if (exponent_flag && !exponent_digit_flag) {
                                exponent_sign_flag = true;
                            } else {
-                                throw std::runtime_error
-                                    (tostr("tokenizer::assemble_token",
-                                           ": improperly placed sign indicator",
-                                           xtag("pos", ix - tk_start),
-                                           xtag("char", *ix)));
+                                return result_type::make_error
+                                    (error_type(__FUNCTION__ /*src_function*/,
+                                                "improperly placed sign indicator",
+                                                current_line_,
+                                                current_pos_,
+                                                initial_whitespace,
+                                                (ix - tk_start)));
                            }
                        } else if (*ix == '.') {
                            if (period_flag) {
-                                throw (std::runtime_error
-                                       (tostr("tokenizer::assemble_token",
-                                              ": duplicate decimal point",
-                                              xtag("pos", ix - tk_start),
-                                              xtag("char", *ix))));
+                                return result_type::make_error
+                                    (error_type(__FUNCTION__ /*src_function*/,
+                                                "duplicate decimal point in numeric literal",
+                                                current_line_,
+                                                current_pos_,
+                                                initial_whitespace,
+                                                (ix - tk_start)));
                            }

                            period_flag = true;
                        } else if ((*ix == 'e') || (*ix == 'E')) {
                            if (exponent_flag) {
-                                throw (std::runtime_error
-                                       (tostr("tokenizer::assemble_token",
-                                              ": duplicate exponent marker",
-                                              xtag("pos", ix - tk_start),
-                                              xtag("char", *ix))));
+                                return result_type::make_error
+                                    (error_type(__FUNCTION__ /*src_function*/,
+                                                "duplicate exponent marker in numeric literal",
+                                                current_line_,
+                                                current_pos_,
+                                                initial_whitespace,
+                                                (ix - tk_start)));
                            }

                            exponent_flag = true;
@ -382,12 +406,13 @@ namespace xo {
                                number_flag = true;
                            }
                        } else {
-                            /* invalid input */
-                            throw (std::runtime_error
-                                   (tostr("tokenizer::assemble_token",
-                                          ": unexpected character in numeric constant",
-                                          xtag("pos", ix - tk_start),
-                                          xtag("char", *ix))));
+                            return result_type::make_error
+                                (error_type(__FUNCTION__ /*src_function*/,
+                                            "unexpected character in numeric constant" /*error_description*/,
+                                            current_line_,
+                                            current_pos_,
+                                            initial_whitespace,
+                                            (ix - tk_start)));
                        }
                    }

@ -443,11 +468,12 @@ namespace xo {

                ++ix; /*skip initial " char*/

+                /* true on final " */
+                bool endofstring = false;
+
                for (; ix != token_text.hi(); ++ix) {
                    log && log(xtag("*ix", *ix));

-                    bool endofstring = false;
-
                    switch(*ix) {
                    case '"':
                        endofstring = true;
@ -461,11 +487,13 @@ namespace xo {
                        ++ix;

                        if (ix == token_text.hi()) {
-                            throw std::runtime_error
-                                (tostr("tokenizer::assemble_token",
-                                       ": malformed string literal",
-                                       xtag("input", std::string_view(token_text.lo(),
-                                                                      token_text.hi()))));
+                            return result_type::make_error
+                                (error_type(__FUNCTION__ /*src_function*/,
+                                            "expecting key following escape character \\",
+                                            current_line_,
+                                            current_pos_,
+                                            initial_whitespace,
+                                            (ix - tk_start)));
                        }

                        switch(*ix) {
@ -490,10 +518,13 @@ namespace xo {
                            tk_text.push_back('"');
                            break;
                        default:
-                            throw std::runtime_error
-                                (tostr("tokenizer::assemble_token",
-                                       ": unexpected \\-escaped char",
-                                       xtag("char", *ix)));
+                            return result_type::make_error
+                                (error_type(__FUNCTION__ /*src_function*/,
+                                            "expecting one of n|r|\"|\\ following escape \\",
+                                            current_line_,
+                                            current_pos_,
+                                            initial_whitespace,
+                                            (ix - tk_start)));
                        }
                        break;
                    default:
@ -505,12 +536,14 @@ namespace xo {
                        break;
                }

-                if (ix != token_text.hi()) {
-                    throw std::runtime_error
-                        (tostr("tokenizer::assemble_token",
-                               ": expected \" to end string literal",
-                               xtag("input", std::string_view(token_text.lo(),
-                                                              token_text.hi()))));
+                if (!endofstring) {
+                    return result_type::make_error
+                        (error_type(__FUNCTION__ /*src_function*/,
+                                    "missing terminating '\"' to complete literal string",
+                                    current_line_,
+                                    current_pos_,
+                                    initial_whitespace,
+                                    (ix - tk_start)));
                }

                log && log(tostr("tokenizer::assemble_token",
@ -632,9 +665,13 @@ namespace xo {
            }

            if (tk_type == tokentype::tk_invalid) {
-                throw std::runtime_error(tostr("tokenizer::assemble_token",
-                                               ": unexpected input x",
-                                               xtag("x", *ix)));
+                return result_type::make_error
+                    (error_type(__FUNCTION__ /*src_function*/,
+                                "illegal input character",
+                                current_line_,
+                                current_pos_,
+                                initial_whitespace,
+                                (ix - tk_start)));
            }

            if ((tk_type == tokentype::tk_i64)
@ -719,6 +756,27 @@ namespace xo {

        }

+        template <typename CharT>
+        void
+        tokenizer<CharT>::capture_current_line(const span_type & input)
+        {
+            // see discard_current_line()
+
+            scope log(XO_DEBUG(debug_flag_));
+
+            /* look ahead to {end of line, end of input}, whichever comes first */
+            const CharT * sol = input.lo();
+            const CharT * eol = sol;
+
+            while ((eol < input.hi()) && (*eol != '\n'))
+                ++eol;
+
+            this->current_line_ = span_type(sol, eol);
+            this->current_pos_ = 0;
+
+            log && log(xtag("current_line", print::printspan(current_line_)));
+        }
+
        template <typename CharT>
        auto
        tokenizer<CharT>::scan(const span_type & input) -> result_type
@ -729,21 +787,22 @@ namespace xo {

            const CharT * ix = input.lo();

+            if (this->current_line_.is_null()) {
+                this->capture_current_line(input);
+            }
+
            /* skip whitespace + remember beginning of most recent line */
            while (is_whitespace(*ix) && (ix != input.hi())) {
-
                if (is_newline(*ix)) {
                    ++ix;
-                    /* look ahead to {end of line, end of input}, whichever comes first */
-                    const CharT * sol = ix;
-                    const CharT * eol = ix;

-                    while ((eol < input.hi()) && (*eol != '\n'))
-                        ++eol;
-
-                    this->current_line_ = span_type(sol, eol);
+                    this->capture_current_line(span_type(ix, input.hi()));
                } else {
                    ++ix;
+
+#ifdef OBSOLETE
+                    ++(this->current_pos_);
+#endif
                }
            }

@ -818,10 +877,12 @@ namespace xo {
                            break;
                        }
                    } else if ((*ix == '\n') || (*ix == '\r')) {
-                        throw std::runtime_error
-                            (tostr("tokenizer::scan",
-                                   ": must use \\n or \\r to encode newline/cr in"
-                                   " string literal"));
+                        return result_type::make_error
+                            (error_type(__FUNCTION__ /*src_function*/,
+                                        "must use \\n or \\r to encode newline/cr in string literal",
+                                        current_line_, current_pos_,
+                                        whitespace.size(),
+                                        (ix - tk_start)));
                    }

                    prev_ch = *ix;
@ -945,6 +1006,25 @@ namespace xo {
                               sr2.error());
        }

+        template <typename CharT>
+        auto
+        tokenizer<CharT>::consume(const span_type & consumed, const span_type & input) -> span_type
+        {
+            this->current_pos_ += consumed.size();
+
+            return input.after_prefix(consumed);
+        }
+
+        template <typename CharT>
+        void
+        tokenizer<CharT>::discard_current_line()
+        {
+            // see capture_current_line()
+
+            this->current_line_ = span_type::make_null();
+            this->current_pos_ = 0;
+        }
+
        template <typename CharT>
        auto
        tokenizer<CharT>::notify_eof(const span_type & input) -> result_type {
--- a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp
@ -7,47 +7,95 @@

 #include "tokentype.hpp"
 #include "span.hpp"
+#include <iomanip>

 namespace xo {
    namespace scm {
-        /** represent a lexing error, with context **/
+        /** @class tokenizer_error
+         *  @brief represent a lexing error, with context
+         *
+         *  @tparam CharT  representation for single characters
+         **/
        template <typename CharT>
        class tokenizer_error {
        public:
            using span_type = span<const CharT>;

        public:
-            /** @brief default ctor represent a not-an-error error object **/
+            /** @defgroup tokenizer-error-ctors **/
+            ///@{
+
+            /** Default ctor represent a not-an-error sentinel object **/
            tokenizer_error() = default;
-            tokenizer_error(char const * src_function,
-                            char const* error_description,
-                            span_type input_line, size_t error_pos)
+            /** Constructor to capture parsing error context
+             *  @p tk_start   current position on entry to scanner
+             *  @p whitespace number of chars initial whitespace
+             *  @p error_pos  error location relative to token start
+             **/
+            tokenizer_error(const char * src_function,
+                            const char * error_description,
+                            span_type input_line,
+                            size_t tk_start,
+                            size_t whitespace,
+                            size_t error_pos)
                : src_function_{src_function},
                  error_description_{error_description},
                  input_line_{input_line},
+                  tk_entry_{tk_start},
+                  whitespace_{whitespace},
                  error_pos_{error_pos} {}
+            ///@}

-            char const* src_function() const { return src_function_; }
-            char const* error_description() const { return error_description_; }
-            size_t error_pos() const { return error_pos_; }
+            /** @defgroup tokenizer-error-access-methods **/
+            ///@{
+
+            const char * src_function() const { return src_function_; }
+            const char * error_description() const { return error_description_; }
            const span_type& input_line() const { return input_line_; }
+            size_t tk_start() const { return tk_entry_; }
+            size_t whitespace() const { return whitespace_; }
+            size_t error_pos() const { return error_pos_; }

-            bool is_not_an_error() const { return error_description_ == nullptr; }
+            ///@}
+
+            /** @defgroup tokenizer-error-general-methods **/
+            ///@{
+
+            /** true, except for a sentinel error object **/
            bool is_error() const { return error_description_ != nullptr; }
+            /** true except for object in sentinel state **/
+            bool is_not_an_error() const { return error_description_ == nullptr; }

+            /** Print representation to stream @p os. Intended for tokenizer diagnostics.
+             *  For Schematika errors prefer @ref report
+             **/
            void print(std::ostream & os) const;

+            /** Print human-oriented error report on @p os. **/
+            void report(std::ostream & os) const;
+
+            ///@}
+
        private:
+            /** @defgroup tokenizer-error-instance-vars **/
+            ///@{
+
            /** source location (in tokenizer) at which error identified **/
            char const * src_function_ = nullptr;
            /** static error description **/
            char const * error_description_ = nullptr;
-            /** position (relative to line_.lo) of error **/
-            size_t error_pos_ = 0;
-            /** complete input line (to the extent available)
-             *  containing error
+            /** complete current input line (to the extent captured)
+             *  that contains error
             **/
            span_type input_line_ = span_type::make_null();
+            /** position (relative to line_.lo) of token start where error encountered **/
+            size_t tk_entry_ = 0;
+            /** number of characters of initial whitespace skipped before token start **/
+            size_t whitespace_ = 0;
+            /** position (relative to @ref tk_entry_) of error **/
+            size_t error_pos_ = 0;
+
+            ///@}
        }; /*error_token*/

        template <typename CharT>
@ -56,11 +104,41 @@ namespace xo {
            os << "<tokenizer-error"
               << xtag("src-function", src_function_)
               << xtag("message", error_description_)
-               << xtag("error-pos", error_pos_)
               << xtag("input", input_line_)
+               << xtag("whitespace", whitespace_)
+               << xtag("tk-start", tk_entry_)
+               << xtag("error-pos", error_pos_)
               << ">";
        }

+        template <typename CharT>
+        void
+        tokenizer_error<CharT>::report(std::ostream & os) const {
+            using namespace std;
+
+            if (error_description_) {
+                const char * prefix = "input: ";
+                const size_t tk_indent = strlen(prefix) + tk_entry_ + whitespace_;
+                //const size_t msg_length = strlen(error_description_);
+
+                const size_t error_pos = 1 + tk_entry_ + whitespace_ + error_pos_;
+
+                os << "char: " << error_pos << endl;
+                os << prefix;
+                for (const char *p = input_line_.lo(), *e = input_line_.hi(); p < e; ++p)
+                    os << *p;
+                os << endl;
+                os << std::setw(tk_indent) << " ";
+
+                for (size_t i = 0; i < error_pos_; ++i) {
+                    os << '_';
+                }
+                os << '^' << endl;
+
+                os << error_description_ << endl;
+            }
+        }
+
        template <typename CharT>
        inline std::ostream &
        operator<< (std::ostream & os,
--- a/xo-tokenizer/include/xo/tokenizer/tokentype.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokentype.hpp
@ -11,10 +11,11 @@
 namespace xo {
    namespace scm {
        /** @enum tokentype
-         *  @brief enum to identify different schematica input token types
+         *  Enum to identify different schematika input token types
         *
         *  Schematica code examples:
         *
+         *  @code
         *    type point :: { xcoord : f64, ycoord : f64 };
         *    type matrix :: array<double, 2>;  // 2-d array
         *
@ -41,6 +42,7 @@ namespace xo {
         *    def matrixproduct(x : matrix, y : matrix) {
         *      [i, j : x.row(i) * y.col(j)];
         *    };
+         *  @endcode
         **/
        enum class tokentype {
            /** sentinel value **/
@ -58,52 +60,52 @@ namespace xo {
            /** a symbol **/
            tk_symbol,

-            /** left-hand parenthesis '(' **/
+            /** left-hand parenthesis @c '(' **/
            tk_leftparen,

-            /** right-hand parenthesis ')' **/
+            /** right-hand parenthesis @c ')' **/
            tk_rightparen,

-            /** left-hand bracket '[' **/
+            /** left-hand bracket @c '[' **/
            tk_leftbracket,

-            /** right-hand bracket ']' **/
+            /** right-hand bracket @c ']' **/
            tk_rightbracket,

-            /** left-hand brace '{' **/
+            /** left-hand brace @c '{' **/
            tk_leftbrace,

-            /** right-hand brace '}' **/
+            /** right-hand brace @c '}' **/
            tk_rightbrace,

-            /** left-hand angle bracket '<' **/
+            /** left-hand angle bracket @c '<' **/
            tk_leftangle,

-            /** right-hand angle bracket '>' **/
+            /** right-hand angle bracket @c '>' **/
            tk_rightangle,

-            /** dot '.' **/
+            /** dot @c '.' **/
            tk_dot,

-            /** comma ',' **/
+            /** comma @c ',' **/
            tk_comma,

-            /** colon ':' **/
+            /** colon @c ':' **/
            tk_colon,

-            /** double-colon '::' **/
+            /** double-colon @c '::' **/
            tk_doublecolon,

-            /** semi-colon ';' **/
+            /** semi-colon @c ';' **/
            tk_semicolon,

-            /** '=' **/
+            /** single equals sign @c '=' **/
            tk_singleassign,

-            /** ':=' **/
+            /** assignment @c ':=' **/
            tk_assign,

-            /** '->' **/
+            /** indirection @c '->' **/
            tk_yields,

            /** note: operators not treated as punctuation
@ -111,47 +113,53 @@ namespace xo {
             *  as is 'maybe*2', 'maybe+1', 'path/to/foo'
             **/

-            /** operator '+' **/
+            /** operator @c '+' **/
            tk_plus,
-            /** operator '-' **/
+            /** operator @c '-' **/
            tk_minus,
-            /** operator '*' **/
+            /** operator @c '*' **/
            tk_star,
-            /** operator '/' **/
+            /** operator @c '/' **/
            tk_slash,

-            /** keyword 'type' **/
+            /** keyword @c 'type' **/
            tk_type,

-            /** keyword 'def' **/
+            /** keyword @c 'def' **/
            tk_def,

-            /** keyword 'lambda' **/
+            /** keyword @c 'lambda' **/
            tk_lambda,

-            /** keyword 'if' **/
+            /** keyword @c 'if' **/
            tk_if,

-            /** keyword 'let' **/
+            /** keyword @c 'let' **/
            tk_let,

-            /** keyword 'in' **/
+            /** keyword @c 'in' **/
            tk_in,

-            /** keyword 'end' **/
+            /** keyword @c 'end' **/
            tk_end,

-            n_tokentype /* comes last, counts #of entries */
+            /** counts number of entries **/
+            n_tokentype
        }; /*tokentype*/

+        /** String representation for enum value.
+         *  For example @c tokentype_descr(tokentype::tk_if) -> @c "if"
+         **/
        extern char const *
        tokentype_descr(tokentype tk_type);

+        /** Print enum value for @p tk_type on stream @p os **/
        inline std::ostream &
        operator<< (std::ostream & os, tokentype tk_type) {
            os << tokentype_descr(tk_type);
            return os;
        }
+
    } /*namespace scm*/
 } /*namespace xo*/

--- a/xo-tokenizer/utest/tokenizer.test.cpp
+++ b/xo-tokenizer/utest/tokenizer.test.cpp
@ -19,15 +19,17 @@ namespace xo {
         *   On second pass, enable verbose logging
         **/
        struct rehearser {
+            rehearser(std::uint32_t att = 0) : attention_{att} {}
+
            /* expect at most one iterator to exist per TestRehearser instance **/
            struct iterator {
-                iterator(rehearser* parent, std::uint32_t attention) : parent_{parent}, attention_{attention} {}
+                explicit iterator(rehearser* parent) : parent_{parent} {}

                iterator& operator++();
-                std::uint32_t operator*() { return attention_; }
+                std::uint32_t operator*() { return parent_->attention_; }

                bool operator==(const iterator& ix2) const {
-                    return (parent_ == ix2.parent_) && (attention_ == ix2.attention_);
+                    return (parent_ == ix2.parent_);
                }

                rehearser* parent_ = nullptr;
@ -35,11 +37,12 @@ namespace xo {

            };

+            bool is_first_pass() const { return attention_ == 0; }
            bool is_second_pass() const { return attention_ == 1; }
            bool enable_debug() const { return is_second_pass(); }

-            iterator begin() { return iterator(this, 0); }
-            iterator end()   { return iterator(this, 2); }
+            iterator begin() { return iterator(this); }
+            iterator end()   { return iterator(nullptr); }

        public:
            /** pass number: 0 or 1 **/
@ -50,23 +53,27 @@ namespace xo {

        auto rehearser::iterator::operator++() -> iterator&
        {
-            ++attention_;
+            if (parent_)
+                ++(parent_->attention_);

-            if (parent_->ok_flag_ && attention_ == 1) {
+            if (parent_->ok_flag_ && (parent_->attention_ == 1)) {
                /* skip 2nd pass */
-                ++attention_;
+                ++(parent_->attention_);
            }

+            if (parent_->attention_ == 2)
+                parent_ = nullptr;
+
            return *this;
        }

        /* use this instead of REQUIRE(expr) in context of a test_rehearser */
 #      define REHEARSE(rehearser, expr)                    \
-        if (rehearser.is_second_pass()) {         \
-            REQUIRE((expr));                      \
+        if (rehearser.is_first_pass()) {                   \
+            bool _f = (expr);                              \
+            rehearser.ok_flag_ = rehearser.ok_flag_ && _f; \
        } else {                                           \
-            REQUIRE(true);                        \
-            rehearser.ok_flag_ &= (expr);         \
+            REQUIRE(expr);                                 \
        }

        /* note: trivial REQUIRE() call in else branch bc we still want
@ -300,12 +307,14 @@ namespace xo {
                  token::semicolon(),
                  token::rightbrace()
                 }},
+#ifdef TODO
                {"a.b",
                 false,
                 {token::symbol_token("a"),
                  token::dot(),
                  token::symbol_token("b")
                 }},
+#endif
                {"a,b",
                 false,
                 {token::symbol_token("a"),
@ -431,6 +440,132 @@ namespace xo {
            }
        } /*TEST_CASE(tokenizer2)*/

+        namespace {
+            using tkz_error_type = xo::scm::tokenizer_error<char>;
+            using span_type = xo::scm::span<const char>;
+
+            struct testcase_error {
+                std::string input_;
+                tkz_error_type expect_error_;
+            };
+
+            testcase_error
+            make_testcase(const char * input, const char * src_function, const char * error_descr,
+                          size_t tk_start, size_t whitespace, size_t error_pos)
+            {
+                testcase_error retval;
+                retval.input_ = input;
+                retval.expect_error_ = tkz_error_type(src_function, error_descr,
+                                                      span_type::from_string(retval.input_),
+                                                      tk_start, whitespace, error_pos);
+                return retval;
+            }
+
+            std::vector<testcase_error>
+            s_testcase3_v = {
+                //             012345678
+                //             --------v
+                make_testcase("123.456ez",
+                              "assemble_token",
+                              "unexpected character in numeric constant",
+                              0, 0, 8),
+                //             01
+                //             -v
+                make_testcase("1-3",
+                              "assemble_token",
+                              "improperly placed sign indicator",
+                              0, 0, 1),
+                //             012
+                //             --v
+                make_testcase("1..2",
+                              "assemble_token",
+                              "duplicate decimal point in numeric literal",
+                              0, 0, 2),
+                //             0123456
+                //             ------v
+                make_testcase("1.23e4e",
+                              "assemble_token",
+                              "duplicate exponent marker in numeric literal",
+                              0, 0, 6),
+                // tokenizer sees string ["\"]
+                //              0 1 2 3
+                //              - - - v
+                make_testcase("\"\\\"",
+                              "assemble_token",
+                              "missing terminating '\"' to complete literal string",
+                              //"expect \\ to escape one of n|t|r|\"|\\ in string literal",
+                              0, 0, 3),
+                // tokenizer sees literal with embedded newline
+                //                        1         2         3
+                //              01234567890123456789012345678901 2
+                //              -------------------------------- v
+                make_testcase("\"everything was going fine until\n\"",
+                              "scan",
+                              "must use \\n or \\r to encode newline/cr in string literal",
+                              0, 0, 32),
+                // tokenizer sees string ["\]
+                //              0 1 2
+                //              - - v
+                make_testcase("\"\\",
+                              "assemble_token",
+                              "expecting key following escape character \\",
+                              0, 0, 2),
+                // tokenizer sees string ["\q"]
+                //              0 12
+                //              - -v
+                make_testcase("\"\\q\"",
+                              "assemble_token",
+                              "expecting one of n|r|\"|\\ following escape \\",
+                              0, 0, 2),
+                //
+                make_testcase("#",
+                              "assemble_token",
+                              "illegal input character",
+                              0, 0, 0),
+            };
+
+            TEST_CASE("tokenizer3", "[tokenizer]") {
+                /* testing error handling */
+
+                using tokenizer = xo::scm::tokenizer<char>;
+
+                constexpr bool c_force_debug = true;
+
+                for (std::size_t i_tc = 0, n_tc = s_testcase3_v.size(); i_tc < n_tc; ++i_tc) {
+                    const testcase_error & testcase = s_testcase3_v[i_tc];
+
+                    rehearser rh(0);
+
+                    for (auto _ : rh) {
+                        scope log(XO_DEBUG2(c_force_debug || rh.enable_debug(), "tokenizer3"));
+
+                        log && log(xtag("pass", _), xtag("ok(-)", rh.ok_flag_));
+                        log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_));
+
+                        tokenizer tkz(c_force_debug || rh.enable_debug());
+
+                        auto in_span = tokenizer::span_type::from_string(testcase.input_);
+
+                        auto sr = tkz.scan2(in_span, true /*eof*/);
+
+                        REHEARSE(rh, sr.is_error());
+
+                        if (sr.error().src_function()) {
+                            REHEARSE(rh, std::string(sr.error().src_function()) == std::string(testcase.expect_error_.src_function()));
+                        }
+                        if (sr.error().error_description()) {
+                            REHEARSE(rh, std::string(sr.error().error_description()) == std::string(testcase.expect_error_.error_description()));
+                        }
+                        REHEARSE(rh, sr.error().whitespace() == testcase.expect_error_.whitespace());
+                        REHEARSE(rh, sr.error().tk_start() == testcase.expect_error_.tk_start());
+                        REHEARSE(rh, sr.error().error_pos() == testcase.expect_error_.error_pos());
+
+                        log && log(xtag("ok(+)", rh.ok_flag_));
+                    }
+                }
+            }
+        }
+
    } /*namespace ut*/
 } /*namespace xo*/

--- a/xo-unit/docs/scaled-unit-class.rst
+++ b/xo-unit/docs/scaled-unit-class.rst
@ -30,7 +30,7 @@ Context
 Introduction
 ------------

-.. code-block::cpp
+.. code-block:: cpp

    #include <xo/unit/scaled_unit.hpp>
				`@ -0,0 +1 @@`
				`add any static {.html, .js, ..} files for sphinx to pickup here`