From b9921d410873b5255eef1e8ac18c715b834aeeb8 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sat, 10 Jan 2026 12:39:09 -0500
Subject: [PATCH 01/33] + xo-tokenizer2 xo-reader2 xo-expression2
 xo-interpreter2

2nd gen schematika interpreter using fomo
---
 CMakeLists.txt                           |  37 +
 README.md                                |   1 -
 cmake/xo-bootstrap-macros.cmake          |  33 +
 cmake/xo_tokenizer2Config.cmake.in       |  12 +
 example/CMakeLists.txt                   |   1 +
 example/tokenrepl/CMakeLists.txt         |  15 +
 example/tokenrepl/tokenrepl.cpp          | 128 ++++
 include/xo/tokenizer2/.gitkeep           |   0
 include/xo/tokenizer2/TkInputState.hpp   | 230 +++++++
 include/xo/tokenizer2/Token.hpp          | 226 ++++++
 include/xo/tokenizer2/Tokenizer.hpp      | 167 +++++
 include/xo/tokenizer2/TokenizerError.hpp | 114 ++++
 include/xo/tokenizer2/buffer.hpp         | 328 +++++++++
 include/xo/tokenizer2/scan_result.hpp    |  81 +++
 include/xo/tokenizer2/span.hpp           | 291 ++++++++
 include/xo/tokenizer2/tokentype.hpp      | 192 ++++++
 src/tokenizer2/CMakeLists.txt            |  15 +
 src/tokenizer2/TkInputState.cpp          | 151 ++++
 src/tokenizer2/Token.cpp                 | 259 +++++++
 src/tokenizer2/Tokenizer.cpp             | 836 +++++++++++++++++++++++
 src/tokenizer2/TokenizerError.cpp        |  60 ++
 src/tokenizer2/scan_result.cpp           |  43 ++
 src/tokenizer2/tokentype.cpp             |  74 ++
 23 files changed, 3293 insertions(+), 1 deletion(-)
 create mode 100644 CMakeLists.txt
 delete mode 100644 README.md
 create mode 100644 cmake/xo-bootstrap-macros.cmake
 create mode 100644 cmake/xo_tokenizer2Config.cmake.in
 create mode 100644 example/CMakeLists.txt
 create mode 100644 example/tokenrepl/CMakeLists.txt
 create mode 100644 example/tokenrepl/tokenrepl.cpp
 create mode 100644 include/xo/tokenizer2/.gitkeep
 create mode 100644 include/xo/tokenizer2/TkInputState.hpp
 create mode 100644 include/xo/tokenizer2/Token.hpp
 create mode 100644 include/xo/tokenizer2/Tokenizer.hpp
 create mode 100644 include/xo/tokenizer2/TokenizerError.hpp
 create mode 100644 include/xo/tokenizer2/buffer.hpp
 create mode 100644 include/xo/tokenizer2/scan_result.hpp
 create mode 100644 include/xo/tokenizer2/span.hpp
 create mode 100644 include/xo/tokenizer2/tokentype.hpp
 create mode 100644 src/tokenizer2/CMakeLists.txt
 create mode 100644 src/tokenizer2/TkInputState.cpp
 create mode 100644 src/tokenizer2/Token.cpp
 create mode 100644 src/tokenizer2/Tokenizer.cpp
 create mode 100644 src/tokenizer2/TokenizerError.cpp
 create mode 100644 src/tokenizer2/scan_result.cpp
 create mode 100644 src/tokenizer2/tokentype.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..9eee1160
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,37 @@
+# xo-tokenizer2/CMakeLists.txt
+
+cmake_minimum_required(VERSION 3.10)
+
+project(xo_tokenizer2 VERSION 1.0)
+enable_language(CXX)
+
+include(GNUInstallDirs)
+include(cmake/xo-bootstrap-macros.cmake)
+
+xo_cxx_toplevel_options3()
+
+# ----------------------------------------------------------------
+# c++ settings
+
+# one-time project-specific c++ flags. usually empty
+set(PROJECT_CXX_FLAGS "")
+add_definitions(${PROJECT_CXX_FLAGS})
+
+# ----------------------------------------------------------------
+# output targets
+
+add_subdirectory(src/tokenizer2)
+add_subdirectory(example)
+#add_subdirectory(utest)
+xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)
+
+if (XO_ENABLE_EXAMPLES)
+    install(TARGETS xo_tokenizer2_repl DESTINATION bin/xo/example/tokenizer2)
+endif()
+
+# ----------------------------------------------------------------
+# docs targets depends on all the other library/utest targets
+#
+#add_subdirectory(docs)
+
+# end CMakeLists.txt
diff --git a/README.md b/README.md
deleted file mode 100644
index d64791cf..00000000
--- a/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# xo-tokenizer2
diff --git a/cmake/xo-bootstrap-macros.cmake b/cmake/xo-bootstrap-macros.cmake
new file mode 100644
index 00000000..2cf387e5
--- /dev/null
+++ b/cmake/xo-bootstrap-macros.cmake
@@ -0,0 +1,33 @@
+# ----------------------------------------------------------------
+# for example:
+#   $ PREFIX=/usr/local   # for example
+#   $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build
+#
+# will get
+#   CMAKE_MODULE_PATH
+# from xo-cmake-config --cmake-module-path
+#
+# and expect .cmake macros in
+#   CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake
+# ----------------------------------------------------------------
+
+find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED)
+
+if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL "prefix"))
+    message(FATAL "could not find xo-cmake-config executable")
+endif()
+
+if (NOT XO_SUBMODULE_BUILD)
+    if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix))
+        # default to typical install location for xo-project-macros
+        execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH)
+        message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
+    endif()
+endif()
+
+# needs to have been installed somewhere on CMAKE_MODULE_PATH,
+# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX)
+#
+include(xo_macros/xo_cxx)
+
+xo_cxx_bootstrap_message()
diff --git a/cmake/xo_tokenizer2Config.cmake.in b/cmake/xo_tokenizer2Config.cmake.in
new file mode 100644
index 00000000..b5c3cd5c
--- /dev/null
+++ b/cmake/xo_tokenizer2Config.cmake.in
@@ -0,0 +1,12 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+
+# note: changes to find_dependency() calls here
+#       must coordinate with xo_dependency() calls
+#       in CMakeLists.txt
+#
+#find_dependency(xo_flatstring)
+
+include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
+check_required_components("@PROJECT_NAME@")
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
new file mode 100644
index 00000000..e761ade5
--- /dev/null
+++ b/example/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(tokenrepl)
diff --git a/example/tokenrepl/CMakeLists.txt b/example/tokenrepl/CMakeLists.txt
new file mode 100644
index 00000000..e7a8c8f3
--- /dev/null
+++ b/example/tokenrepl/CMakeLists.txt
@@ -0,0 +1,15 @@
+# xo-tokenizer2/example/tokenrepl/CMakeLists.txt
+
+set(SELF_EXE xo_tokenizer2_repl)
+set(SELF_SRCS tokenrepl.cpp)
+
+if (XO_ENABLE_EXAMPLES)
+    xo_add_executable(${SELF_EXE} ${SELF_SRCS})
+    xo_self_dependency(${SELF_EXE} xo_tokenizer2)
+    xo_external_target_dependency(${SELF_EXE} replxx replxx::replxx)
+
+    find_package(Threads REQUIRED)   # replxx needs this
+    target_link_libraries(${SELF_EXE} PUBLIC Threads::Threads)
+endif()
+
+# end CMakeLists.txt
diff --git a/example/tokenrepl/tokenrepl.cpp b/example/tokenrepl/tokenrepl.cpp
new file mode 100644
index 00000000..f97b9cd0
--- /dev/null
+++ b/example/tokenrepl/tokenrepl.cpp
@@ -0,0 +1,128 @@
+/** @file tokenrepl.cpp **/
+
+#include <xo/tokenizer2/Tokenizer.hpp>
+#include <xo/tokenizer2/Token.hpp>
+#include <xo/tokenizer2/tokentype.hpp>
+#include <xo/tokenizer2/span.hpp>
+#include <xo/indentlog/log_config.hpp>
+#include <replxx.hxx>
+#include <iostream>
+#include <unistd.h> // for isatty
+
+// presumeably replxx assumes input is a tty
+//
+bool replxx_getline(bool interactive,
+                    std::size_t parser_stack_size,
+                    replxx::Replxx & rx,
+                    std::string& input)
+{
+    using namespace std;
+
+    char const * prompt = "";
+
+    if (interactive) {
+        if (parser_stack_size <= 1)
+            prompt = "> ";
+        else
+            prompt = ". ";
+    }
+
+    const char * input_cstr = rx.input(prompt);
+
+    bool retval = (input_cstr != nullptr);
+
+    if (retval) {
+        //cerr << "got reval->true" << endl;
+
+        input = input_cstr;
+
+    } else {
+        //cerr << "got retval->false" << endl;
+    }
+
+    rx.history_add(input);
+
+    // we want tokenizer to see newline, it's syntax
+    input.push_back('\n');
+
+    return retval;
+}
+
+#ifdef OBSOLETE
+bool repl_getline(bool interactive,
+                  std::istream & in,
+                  std::ostream & out,
+                  std::string & input)
+{
+    if (interactive) {
+        out << "> ";
+        std::flush(out);
+    }
+
+    return static_cast<bool>(std::getline(in, input));
+}
+#endif
+
+int
+main() {
+    using xo::scm::Tokenizer;
+    using xo::scm::span;
+    using xo::scm::operator<<;
+    using replxx::Replxx;
+
+    using namespace std;
+
+    using span_type = span<const char>;
+
+    xo::log_config::min_log_level = xo::log_level::severe;
+
+    bool interactive = isatty(STDIN_FILENO);
+
+    Replxx rx;
+    rx.set_max_history_size(1000);
+    rx.history_load("repl_history.txt");
+
+    Tokenizer tkz(xo::log_config::min_log_level <= xo::log_level::info);
+
+    string input_str;
+
+    size_t line_no = 1;
+
+    constexpr std::size_t c_maxlines = 25;
+
+    while (
+        //repl_getline(interactive, cin, cout, input_str)  // once upon a time
+        replxx_getline(interactive, 0 /*parser_stack_size*/, rx, input_str))
+    {
+        span_type input = span_type::from_string(input_str);
+
+        //cout << "input: " << input << endl;
+
+        // reminder: input may contain multiple tokens
+        while (!input.empty()) {
+            auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/);
+
+            if (tk.is_valid()) {
+                cout << tk << endl;
+            } else if (error.is_error()) {
+                cout << "tokenizer error: " << endl;
+                error.report(cout);
+
+                break;
+            }
+
+            input = input.after_prefix(consumed);
+        }
+
+        /* here: input.empty() or error encountered */
+
+        ++line_no;
+
+        if (line_no > c_maxlines) {
+            cout << "always exit after " << c_maxlines << " lines of input" << endl;
+            break;
+        }
+    }
+}
+
+/** end tokenrepl.cpp */
diff --git a/include/xo/tokenizer2/.gitkeep b/include/xo/tokenizer2/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/include/xo/tokenizer2/TkInputState.hpp b/include/xo/tokenizer2/TkInputState.hpp
new file mode 100644
index 00000000..531585a1
--- /dev/null
+++ b/include/xo/tokenizer2/TkInputState.hpp
@@ -0,0 +1,230 @@
+/* @file TkInputState.hpp
+ *
+ * author: Roland Conybeare, Jun 2025
+ */
+
+#pragma once
+
+#include "span.hpp"
+
+namespace xo {
+    namespace scm {
+        /** enum to report outcome of @ref capture_current_line **/
+        enum class input_error {
+            /** normal return, input line successfully identified and captured **/
+            ok = 0,
+            /** incomplete input; should not have been submitted
+             *  to @ref capture_current_line.
+             *  note: submit last line of input with eof_flag=true
+             **/
+            incomplete,
+            N
+        };
+
+        /** @class input_state
+         *  @brief Track detailed input position for use in error messages
+         *
+         *  input characters fall into two categories:
+         *  - consumed: memory can be reclaimed/recycled
+         *  - buffered: memory will be retained unaltered until consumed
+         *
+         *  remarks:
+         *  - always in one of two states:
+         *    - empty
+         *    - contains exactly one line of input
+         *  - also record current input position.
+         *    Use this for example to identify where tokenizer rejected input.
+         *  - .current_pos advances by one token
+         *
+         *  - buffered characters always form a single contiguous range.
+         *  - input_state does not own any storage; storage is owned elsewhere
+         *
+         *  @text
+         *
+         *    <------------------.current_line------------------>
+         *                                   >  <-- .whitespace
+         *    cccccccccccccccccccccccccccccccc__TTTTTTTTxxxxxxxxx
+         *    ^                                 ^                ^
+         *    .current_line.lo                  |                .current_line.hi
+         *                           .current_pos
+         *
+         *    <----prev_line----> <----current_line---->
+         *                                   >  <--whitespace
+         *    ppppppppppppppppppp cccccccccccc__TTTTTTTT
+         *    ^
+         *
+         *  @endtext
+         **/
+        class TkInputState {
+        public:
+            /** @defgroup input-state-type-traits input-state type straits **/
+            ///@{
+
+            using CharT = char;
+
+            /** type representing a contiguous span of tokenizer input characters **/
+            using span_type = span<const CharT>;
+
+            ///@}
+
+        public:
+            /** @defgroup input-state-ctors input_state constructors **/
+            ///@{
+
+            TkInputState() = default;
+            explicit TkInputState(bool debug_flag) : debug_flag_{debug_flag} {}
+            /** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
+             *  Introduced for unit tests, not used in tokenizer.
+             **/
+            explicit TkInputState(const span<const CharT>& current_line,
+                                  size_t current_pos,
+                                  size_t whitespace) : current_line_{current_line},
+                                                       current_pos_{current_pos},
+                                                       whitespace_{whitespace} {}
+
+            ///@}
+
+            /** @defgroup input-state-static-methods input_state static methods **/
+            ///@{
+
+            /** recognize the newline character '\n' **/
+            static bool is_newline(CharT ch);
+            /** identifies whitespace chars.
+             *  These are chars that do not belong to any token.
+             *  They are not permitted to appear within
+             *  a symbol or string token.
+             *  Appearance of a whitespace char forces completioon of
+             *  preceding token.
+             **/
+            static bool is_whitespace(CharT ch);
+
+            ///@}
+
+            /** @defgroup input-state-access-methods **/
+            ///@{
+
+#pragma GCC diagnostic push
+#ifndef __APPLE__
+#pragma GCC diagnostic ignored "-Wchanges-meaning"
+#endif
+            const span_type & current_line() const { return current_line_; }
+#pragma GCC diagnostic pop
+            size_t tk_start() const { return tk_start_; }
+            size_t current_pos() const { return current_pos_; }
+            size_t whitespace() const { return whitespace_; }
+            bool debug_flag() const { return debug_flag_; }
+
+            ///@}
+
+            /** @defgroup input-state-general-methods **/
+            ///@{
+
+            /** Input state less @p n chars.
+             *  Use to recover input state before a complete but error-triggering token
+             **/
+            TkInputState rewind(std::size_t n) const;
+
+            /** Capture prefix of @p input up to first newline.
+             *  Set read position to start of line.
+             *
+             *  Alters:
+             *    .current_line
+             *    .current_pos
+             *
+             * Return pair comprising error code and input span representing first line
+             * (including trailing newline) from @p input.
+             **/
+            std::pair<input_error, span_type> capture_current_line(const span_type & input,
+                                                                   bool eof_flag);
+
+            /** atomically return current line while discarding it from input state
+             *
+             *  Alters
+             *    .current_line
+             *    .current_pos
+             *    .whitespace
+             **/
+            span_type consume_current_line();
+
+            /** Reset input state for start of next line.
+             *  Expression parser may use this to discard remainder of input line
+             *  after a parsing error.
+             *
+             * Alters:
+             *   .current_line
+             *   .current_pos
+             *   .whitespace
+             **/
+            void discard_current_line();
+
+            /** Advance input position by @p z
+             *
+             *  Alters:
+             *   .current_pos
+             **/
+            void advance(size_t z);
+
+            /** Advance .current_pos to pos.
+             *  Require: pos in @ref current_line_
+             **/
+            void advance_until(const CharT * pos);
+
+            /** Skip prefix of input, starting at current read position,
+             *  comprising only whitespace.
+             *
+             *  Presume input position is at end of token;
+             *  on return @ref whitespace_ counts number of whitespace characters
+             *  skipped.
+             *
+             *  Return pointer to first non-whitespace character after @ref current_pos_
+             *  or @ref current_line_.hi if reached end of buffered line.
+             *
+             *  Alters:
+             *    .whitespace
+             **/
+            const CharT * skip_leading_whitespace();
+
+            ///@}
+
+        private:
+            /** @defgroup input-state-instance-vars input_state instance variables **/
+            ///@{
+
+            /** remember current input line.  Used only to report errors **/
+            span<const CharT> current_line_ = span<const CharT>();
+            /** start of last token within @ref current_line_ **/
+            size_t tk_start_ = 0;
+            /** input position within @ref current_line_ **/
+            size_t current_pos_ = 0;
+            /** number of whitespace chars since end of preceding token,
+             *  or last newline, whichever is less
+             **/
+            size_t whitespace_ = 0;
+
+            /** true to log input activity */
+            bool debug_flag_ = false;
+
+            ///@}
+        }; /*TkInputState*/
+
+        inline std::ostream &
+        operator<<(std::ostream & os,
+                   const TkInputState & x)
+        {
+            using xo::print::unq;
+
+            os << "<input_state"
+               << xtag("tk", x.tk_start())
+               << xtag("pos", x.current_pos())
+               << xtag("line",
+                       unq(std::string_view(x.current_line().lo(),
+                                            x.current_line().hi())))
+               << xtag("whitespace", x.whitespace())
+            << ">";
+
+            return os;
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end TkInputState.hpp */
diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
new file mode 100644
index 00000000..0994e3b8
--- /dev/null
+++ b/include/xo/tokenizer2/Token.hpp
@@ -0,0 +1,226 @@
+/* file Token.hpp
+ *
+ * author: Roland Conybeare, Jul 2024
+ */
+
+#pragma once
+
+#include "tokentype.hpp"
+#include "xo/indentlog/print/tag.hpp"
+#include <stdexcept>
+#include <ostream>
+#include <string>
+#include <cstdint>
+
+namespace xo {
+    namespace scm {
+        namespace detail {
+            /* compute a * b^p,  p >= 0 */
+            constexpr double
+            pow_aux(double a, double b, int p) {
+                while (p > 0) {
+                    if (p % 2 == 1) {
+                        /* a * b^p = a * b^(2q + 1) = a.b * 10^(2q) */
+                        a *= b;
+                        p -= 1;
+                    } else {
+                        /* a * b^p = a * b^(2q) = a * (b^2)^q */
+                        b = b * b;
+                        p /= 2;
+                    }
+                }
+
+                /* a * b^0 = a */
+                return a;
+            }
+
+            constexpr double
+            pow10(int p) {
+                if (p >= 0)
+                    return pow_aux(1.0, 10.0, p);
+                else
+                    return 1.0 / pow_aux(1.0, 10.0, -p);
+            }
+        }
+
+        /** @class token
+         *  @brief Represent a Schematika lexical token
+         **/
+        class Token {
+        public:
+            /** @defgroup token-ctors token constructors **/
+            ///@{
+
+            /** default ctor creates token with type @c tk_invalid **/
+            Token() = default;
+            /** create token with type @c tk_type and input text @c text **/
+            Token(tokentype tk_type, const std::string & text = "")
+                : tk_type_{tk_type}, text_{text} {}
+
+            /** create invalid token (same as null ctor, but explicit) **/
+            static Token invalid() { return Token(); }
+            /** Create token representing a boolean literal from text @p txt
+             *  @p txt must be @c true or @c false
+             **/
+            static Token bool_token(const std::string & txt) {
+                return Token(tokentype::tk_bool, txt);
+            }
+            /** Create token representing 64-bit signed integer literal parsed from decimal @p txt.
+             *  The string @p txt must be a decimal integer literal, since @ref i64_value re-parses @p txt.
+             **/
+            static Token i64_token(const std::string & txt) {
+                return Token(tokentype::tk_i64, txt);
+            }
+            /** create token representing 64-bit floating-point literal parsed from decimal @p txt
+             *  The string @p txt must be a decimal floating-point literal, since @ref f64_value re-parses @p txt.
+             **/
+            static Token f64_token(const std::string & txt) {
+                return Token(tokentype::tk_f64, txt);
+            }
+            /** create token representing literal string parsed from @p txt **/
+            static Token string_token(const std::string & txt) {
+                return Token(tokentype::tk_string, txt);
+            }
+            /** create token representing a symbol parsed from @p txt.
+             *  Note that not all strings are valid symbol names.
+             **/
+            static Token symbol_token(const std::string & txt) {
+                return Token(tokentype::tk_symbol, txt);
+            }
+            /** token representing left angle bracket @c "<" **/
+            static Token leftangle() { return Token(tokentype::tk_leftangle); }
+            /** token representing right angle bracket @c ">" **/
+            static Token rightangle() { return Token(tokentype::tk_rightangle); }
+            /** token representing left parenthesis @c "(" **/
+            static Token leftparen() { return Token(tokentype::tk_leftparen); }
+            /** Token representing right parenthesis @c ")" **/
+            static Token rightparen() { return Token(tokentype::tk_rightparen); }
+            /** token representing left bracket @c "[" **/
+            static Token leftbracket() { return Token(tokentype::tk_leftbracket); }
+            /** token representing right bracket @c "]" **/
+            static Token rightbracket() { return Token(tokentype::tk_rightbracket); }
+            /** token representing left brace @c "{" **/
+            static Token leftbrace() { return Token(tokentype::tk_leftbrace); }
+            /** token representing right brace @c "}' **/
+            static Token rightbrace() { return Token(tokentype::tk_rightbrace); }
+            /** token representing period @c "." **/
+            static Token dot() { return Token(tokentype::tk_dot); }
+            /** token representing comma @c "," **/
+            static Token comma() { return Token(tokentype::tk_comma); }
+            /** token representing colon @c ":" **/
+            static Token colon() { return Token(tokentype::tk_colon); }
+            /** token representing double-colo @c "::" **/
+            static Token doublecolon() { return Token(tokentype::tk_doublecolon); }
+            /** token representing semicolon @c ";" **/
+            static Token semicolon() { return Token(tokentype::tk_semicolon); }
+            /** token representing single-assignment @c "=" **/
+            static Token singleassign() { return Token(tokentype::tk_singleassign); }
+            /** token representing unrestricted assignment @c ":=" **/
+            static Token assign_token() { return Token(tokentype::tk_assign); }
+            /** token representing indirection @c "->" **/
+            static Token yields() { return Token(tokentype::tk_yields); }
+
+            /** token for @c "+" **/
+            static Token plus_token() { return Token(tokentype::tk_plus); }
+            /** token for @c "-" **/
+            static Token minus_token() { return Token(tokentype::tk_minus); }
+            /** token for @c "*" **/
+            static Token star_token() { return Token(tokentype::tk_star); }
+            /** token for @c "/" **/
+            static Token slash_token() { return Token(tokentype::tk_slash); }
+
+            /** token representing keyword @c type **/
+            static Token type() { return Token(tokentype::tk_type); }
+            /** token representing keyword @c def **/
+            static Token def() { return Token(tokentype::tk_def); }
+            /** token representing keyword @c lambda **/
+            static Token lambda() { return Token(tokentype::tk_lambda); }
+            /** token representing keyword @c if **/
+            static Token if_token() { return Token(tokentype::tk_if); }
+            /** token representing keyword @c else **/
+            static Token else_token() { return Token(tokentype::tk_else); }
+            /** token representing keyword @c let **/
+            static Token let() { return Token(tokentype::tk_let); }
+            /** token representing keyword @c in **/
+            static Token in() { return Token(tokentype::tk_in); }
+            /** token representing keyword @c end **/
+            static Token end() { return Token(tokentype::tk_end); }
+
+            ///@}
+
+            /** @defgroup token-access-methods **/
+            ///@{
+
+            tokentype tk_type() const { return tk_type_; }
+            const std::string & text() const { return text_; }
+
+            ///@}
+
+            /** @defgroup token-general-methods **/
+            ///@{
+
+            /** true if token understood to represent valid input
+             *  i.e. any token type except @c tk_invalid
+             **/
+            bool is_valid() const { return tk_type_ != tokentype::tk_invalid; }
+            /** true for sentinel token with type tk_invalid **/
+            bool is_invalid() const { return tk_type_ == tokentype::tk_invalid; }
+
+            /** true for tokens with variable text.  false for those with fixed textual representation **/
+            bool has_variable_text() const { return (tk_type_ == tokentype::tk_i64
+                                                     || tk_type_ == tokentype::tk_f64
+                                                     || tk_type_ == tokentype::tk_string
+                                                     || tk_type_ == tokentype::tk_symbol); }
+
+            /** expect input matching @c true or @c false **/
+            bool bool_value() const;
+
+            /** expect input matching @c [+|-][0-9][0-9]* **/
+            std::int64_t i64_value() const;
+
+            /** expect input matching @c [+|-][0-9]*[.][0-9]*[e|E][+|-][0-9]* **/
+            double f64_value() const;
+
+            /** print human-readable token representation on stream @p os **/
+            void print(std::ostream & os) const;
+
+            ///@}
+
+        private:
+            /** @defgroup token-instance-vars **/
+            ///@{
+
+            /** category for this token **/
+            tokentype tk_type_ = tokentype::tk_invalid;
+
+            /** characters comprising this token.
+             *  only provided for certain token types:
+             *
+             *    tk_i64
+             *    tk_f64
+             *    tk_string
+             *    tk_symbol
+             **/
+            std::string text_;
+
+            ///@}
+        };
+
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const Token & tk)
+        {
+            tk.print(os);
+            return os;
+        }
+    } /*namespace scm*/
+
+#ifndef ppdetail_atomic
+    namespace print {
+        PPDETAIL_ATOMIC(xo::scm::token<char>);
+    }
+#endif
+
+} /*namespace xo*/
+
+/* end Token.hpp */
diff --git a/include/xo/tokenizer2/Tokenizer.hpp b/include/xo/tokenizer2/Tokenizer.hpp
new file mode 100644
index 00000000..99005fee
--- /dev/null
+++ b/include/xo/tokenizer2/Tokenizer.hpp
@@ -0,0 +1,167 @@
+/* file Tokenizer.hpp
+ *
+ * author: Roland Conybeare, Jul 2024
+ */
+
+#pragma once
+
+#include "Token.hpp"
+#include "TkInputState.hpp"
+#include "span.hpp"
+#include "scan_result.hpp"
+#include "xo/indentlog/scope.hpp"
+#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <cassert>
+
+namespace xo {
+    namespace scm {
+        /** @class Tokenizer
+         *  @brief Parse a Schematika character stream into lexical tokens
+         *
+         *  Use:
+         *
+         *  @code
+         *    // see xo-tokenizer2/example/tokenrepl/tokenrepl.cpp
+         *    // for exact working code
+         *
+         *    using tokenizer_type = tokenizer<char>;
+         *    using span_type = tokenizer_type::span_type;
+         *
+         *    tokenizer_type tkz;
+         *    span_type input = ...;
+         *
+         *    while (!input.empty()) {
+         *        auto [tk, consumed, error] = tkz.scan(input);
+         *
+         *        if (tk.is_valid()) {
+         *            // do something with tk
+         *        } else if (error.is_error()) {
+         *            error.report(cout);
+         *            break;
+         *        }
+         *
+         *        input = input.after_prefix(consumed);
+         *    }
+         *
+         *    if endofinput {
+         *        auto [tk, consumed, error] = tzk.notify_eof()
+         *
+         *        // do something with (final) tk if tk.is_valid()
+         *    }
+         *
+         *  @endcode
+         *
+         * See tokentype.hpp for token types
+         **/
+        class Tokenizer {
+        public:
+            using CharT = char;
+            using token_type = Token;
+            using error_type = TokenizerError;
+            using span_type = span<const CharT>;
+            using input_state_type = TkInputState;
+            using result_type = scan_result;
+
+        public:
+            /** @defgroup tokenizer-ctors tokenizer constructors **/
+            ///@{
+
+            Tokenizer(bool debug_flag = false);
+
+            ///@}
+
+            /** @defgroup tokenizer-access-methods tokenizer access methods **/
+            ///@{
+
+#pragma GCC diagnostic push
+#ifndef __APPLE__
+#pragma GCC diagnostic ignored "-Wchanges-meaning"
+#endif
+            const TkInputState & input_state() const { return input_state_; }
+#pragma GCC diagnostic pop
+
+            ///@}
+
+            /** @defgroup tokenizer-general-methods tokenizer methods **/
+            ///@{
+
+            /** identifies punctuation chars.
+             *  These are chars that are not permitted to appear within
+             *  a symbol token.  Instead they force completion of
+             *  a preceding token,  and start a new token with themselves
+             **/
+            static bool is_1char_punctuation(CharT ch);
+
+            /** more-relaxed version of is_1char_punctuation.
+             *  Chars that are not permitted to appear within a symbol token,
+             *  but may form token combined with next character
+             **/
+            static bool is_2char_punctuation(CharT ch);
+
+            /** assemble token from text @p token_text.
+             *  @p initial_whitespace   Amount of whitespace input being consumed from input.
+             *  @p token_text subset of input_line representing a single token.
+             *  @p p_input_state input state containing input_line.  On exit current line cleared
+             *                   if error
+             *
+             *  retval.consumed will represent some possibly-empty prefix of @p input
+             **/
+            static scan_result assemble_token(std::size_t initial_whitespace,
+                                              const span_type & token_text,
+                                              TkInputState * p_input_state);
+
+            /** degenerate version of assemble_token() on reaching end-of-file **/
+            static scan_result assemble_final_token(const span_type & token_text,
+                                                    TkInputState * p_input_state);
+
+            /** true if tokenizer contains stored prefix of
+             *  possibly-incomplete token
+             **/
+            bool has_prefix() const { return !prefix_.empty(); }
+
+            /** scan for next input token,  given @p input.
+             *  Note:
+             *  - tokenizer can consume input (e.g. whitespace)
+             *    without completing a token
+             *  - input will remember the extent of the last line of input
+             *    for which parsing has begun, but not completed.
+             *    It's required that at least that portion of the input span
+             *    remain valid across scan(), scan2() calls
+             *
+             *  @return {parsed token, consumed span}
+             **/
+            scan_result scan(const span_type & input,
+                             bool eof_flag);
+
+            /** discard current line after error.  Just cleans up error-reporting state **/
+            void discard_current_line();
+
+            ///@}
+
+        private:
+            /** @defgroup tokenizer-instance-vars tokenizer instance variables **/
+            ///@{
+
+            /** track input state (line#,pos,..) for error messages.
+             *  There's an ordering problem here:
+             *  1. input_state_.skip_leading_whitespace() advances
+             *     current line automagically when it sees \n
+             *  2. need to capture value of @ref input_state_ _before_ newline
+             *  3. but neeed newline to end token
+             *  Also recall input_state_type needed for reporting errors.
+             **/
+            input_state_type input_state_;
+            /** Accumulate partial token here.
+             *  This will happen if input sent to @ref tokenizer::scan
+             *  ends without whitespace such that last available token's
+             *  extent is not determined
+             **/
+            std::string prefix_;
+
+            ///@}
+        }; /*tokenizer*/
+
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end Tokenizer.hpp */
diff --git a/include/xo/tokenizer2/TokenizerError.hpp b/include/xo/tokenizer2/TokenizerError.hpp
new file mode 100644
index 00000000..a7fab3c2
--- /dev/null
+++ b/include/xo/tokenizer2/TokenizerError.hpp
@@ -0,0 +1,114 @@
+/* file TokenizerError.hpp
+ *
+ * author: Roland Conybeare, Jun 2025
+ */
+
+#pragma once
+
+#include "TkInputState.hpp"
+#include "tokentype.hpp"
+#include "span.hpp"
+#include <iomanip>
+
+namespace xo {
+    namespace scm {
+        /** @class tokenizer_error
+         *  @brief represent a lexing error, with context
+         *
+         *  @tparam CharT  representation for single characters
+         **/
+        class TokenizerError {
+        public:
+            using CharT = char;
+            using span_type = span<const CharT>;
+
+        public:
+            /** @defgroup tokenizer-error-ctors **/
+            ///@{
+
+            /** Default ctor represents a not-an-error sentinel object **/
+            TokenizerError() = default;
+            /** Constructor to capture parsing error context
+             *  @p tk_start   current position on entry to scanner
+             *  @p error_pos  error location relative to token start
+             **/
+            TokenizerError(const char * src_function,
+                           std::string error_description,
+                           const TkInputState & input_state,
+                           size_t error_pos)
+                : src_function_{src_function},
+                  error_description_{std::move(error_description)},
+                  input_state_{input_state},
+                  error_pos_{error_pos}
+                {
+                    scope log(XO_DEBUG(input_state.debug_flag()));
+
+                    log && log(xtag("input_state.current_pos", input_state.current_pos()),
+                               xtag("error_pos", error_pos));
+                }
+            ///@}
+
+            /** @defgroup tokenizer-error-access-methods **/
+            ///@{
+
+            const char * src_function() const { return src_function_; }
+            const std::string & error_description() const { return error_description_; }
+#pragma GCC diagnostic push
+#ifndef __APPLE__
+#pragma GCC diagnostic ignored "-Wchanges-meaning"
+#endif
+            const TkInputState & input_state() const { return input_state_; }
+#pragma GCC diagnostic pop
+            size_t tk_start() const { return input_state_.current_pos(); }
+            size_t whitespace() const { return input_state_.whitespace(); }
+            size_t error_pos() const { return error_pos_; }
+
+            ///@}
+
+            /** @defgroup tokenizer-error-general-methods **/
+            ///@{
+
+            /** true, except for a sentinel error object **/
+            bool is_error() const { return !error_description_.empty(); }
+            /** false except for object in sentinel state **/
+            bool is_not_an_error() const { return error_description_.empty(); }
+
+            /** Print representation to stream @p os. Intended for tokenizer diagnostics.
+             *  For Schematika errors prefer @ref report
+             **/
+            void print(std::ostream & os) const;
+
+            /** Print human-oriented error report on @p os. **/
+            void report(std::ostream & os) const;
+
+            ///@}
+
+        private:
+            /** @defgroup tokenizer-error-vars **/
+            ///@{
+
+            /** source location (in tokenizer) at which error identified **/
+            char const * src_function_ = nullptr;
+            /** static error description **/
+            std::string error_description_;
+            /** input state associated with this error.
+             *  Sufficient to precisely locate it with context.
+             **/
+            TkInputState input_state_;
+            /** position (relative to @ref tk_entry_) of error **/
+            size_t error_pos_ = 0;
+
+            ///@}
+        }; /*error_token*/
+
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const TokenizerError & tkerr)
+        {
+            tkerr.print(os);
+            return os;
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end tokenizer_error.hpp */
diff --git a/include/xo/tokenizer2/buffer.hpp b/include/xo/tokenizer2/buffer.hpp
new file mode 100644
index 00000000..7b19316b
--- /dev/null
+++ b/include/xo/tokenizer2/buffer.hpp
@@ -0,0 +1,328 @@
+/** @file buffer.hpp **/
+
+#pragma once
+
+#include "span.hpp"
+#include <utility>
+#include <cstdint>
+#include <cassert>
+#include <new>
+
+namespace xo {
+    namespace scm {
+        /**
+         * @class buffer buffer.hpp
+         *
+         * @brief Container for a (possibly owned) FIFO queue of chars
+         *
+         * @tparam CharT.  buffer element type.
+         *
+         * @code
+         *  .buf
+         *
+         *    +------------------------------------------+
+         *    |  |  ...  |  | X|  ... | X|  |    ...  |  |
+         *    +------------------------------------------+
+         *     ^             ^            ^               ^
+         *     0             .lo          .hi             .buf_z
+         *
+         *                   <-contents-><----avail----->
+         * @endcode
+         *
+         * Buffer does not support wrapped content:
+         * content that has not been consumed always occupies contiguous memory.
+         *
+         * Example:
+         * @code
+         * // 1.
+         *   buffer<char> buf(64*1024);
+         *   buf.empty() -> true
+         *   buf.buf_z() -> 65536
+         *   buf.lo_pos() -> 0
+         *   buf.hi_pos() -> 65536
+         *   buf.contents() -> empty span
+         *   buf.avail() -> span entire buffer memory
+         *
+         *   // write to (a prefix of) buf.avail()
+         *   ::strncpy(buf.buf(), "hello, world\n", 13);
+         *   buf.produce(span_type(buf.buf(), buf.buf() + 13));
+         *
+         *   buf.lo_pos() -> 0
+         *   buf.hi_pos() -> 13
+         *   buf.contents() -> "hello, world\n";
+         *
+         *
+         *   // examine stored content (does not change buffer state)
+         *   auto span = buf.contents();
+         *   cerr << string_view(span.lo(), span.hi());  // "hello, world\n"
+         *
+         *   // consume (a prefix of) stored content
+         *   buf.consume(span.prefix(7);
+         *
+         *   buf.lo_pos() -> 7
+         *   buf.hi_pos() -> 13
+         *   buf.contents() -> "world\n"
+         *
+         *   // consuming all remain content resets to original state
+         *   buf.consume(buf.contents());
+         *
+         *   buf.empty() -> true
+         *   buf.hi_pos() -> 0     // not 13!
+         *
+         * // 2.
+         *   buffer<char> buf;
+         *   buf.empty() -> true
+         *   buf.buf_z() -> 0
+         *   buf.lo_pos() -> 0
+         *   buf.hi_pos() -> 0
+         *   buf.contents() -> empty span
+         *   buf.avail() -> empty span
+         *
+         *   // allocate memory separately from ctor
+         *   buf.alloc(64*1024);
+         * @endcode
+         **/
+        template <typename CharT>
+        class buffer {
+        public:
+            /** @brief typealias for span of CharT **/
+            using span_type = span<CharT>;
+            /** @brief typealias for buffer size (counts CharT's, not bytes) **/
+            using size_type = std::uint64_t;
+
+        public:
+            /** @brief create empty buffer.
+
+                Does not allocate any storage;  @see alloc
+            **/
+            buffer() = default;
+            /** @brief create empty buffer,  and possibly allocate storage.
+
+                @param buf_z    Buffer size.  allocate storage (owned by this buffer) if >0.
+                @param align_z  Align to this value,  e.g. 8 to align storage on an 8-byte boundary
+            **/
+            buffer(size_type buf_z,
+                   size_type align_z = sizeof(char))
+                : is_owner_{true},
+                  buf_{buf_z ? (new (std::align_val_t(align_z)) CharT [buf_z]) : nullptr},
+                  buf_z_{buf_z},
+                  lo_pos_{0},
+                  hi_pos_{0}
+                {}
+            /** @brief buffer is not copyable **/
+            buffer(buffer const & x) = delete;
+            /** @brief destructor.  Release storage if owned **/
+            ~buffer() { this->reset(); }
+
+            /** @name Access methods **/
+            ///@{
+
+            /** @brief start of buffer memory **/
+            CharT * buf() const { return buf_; }
+            /** @brief buffer size (number of characters) **/
+            size_type buf_z() const { return buf_z_; }
+            /** @brief current start position within buffer **/
+            size_type lo_pos() const { return lo_pos_; }
+            /** @brief current end position within buffer **/
+            size_type hi_pos() const { return hi_pos_; }
+
+            ///@}
+
+            /** @brief readonly access to a single buffer element.
+
+                Relative to start of buffer (ignores current consume position)
+            **/
+            CharT const & operator[](size_type i) const { return buf_[i]; }
+
+            /** @brief return span for current buffer contents **/
+            span_type contents() const { return span_type(buf_ + lo_pos_,
+                                                          buf_ + hi_pos_); }
+            /** @brief returns span for writable buffer contents (unused prefix following produce position **/
+            span_type avail() const { return span_type(buf_ + hi_pos_,
+                                                       buf_ + buf_z_); }
+
+            /** @brief @c true iff buffer is empty **/
+            bool empty() const { return lo_pos_ == hi_pos_; }
+
+
+            /**
+               @brief update buffer produce position, after (independently) writing contents of span to it
+
+               @pre left endpoint of @p span equals buffer produce position (@c .hi_pos)
+               @pre right endpoint of @p span within bounds of buffer memory range
+               @post right endpoint of @p span equals buffer produce position.
+            **/
+            void produce(span_type const & span) {
+                assert(span.lo() == buf_ + hi_pos_);
+
+                hi_pos_ += span.size();
+            }
+
+            /**
+               @brief update buffer consume position,  when done with contents of span
+
+               @pre left endpoint of @p span equals buffer consume position (@c .lo_pos)
+               @pre right endpoint of @p span within bounds of buffer memory range
+               @post Either
+               buffer is empty, with @c .lo_pos = @c .hi_pos = @c 0.
+               buffer is non-empty, right endpoint of @p span equals new buffer consume position.
+            **/
+            void consume(span_type const & span) {
+                if (span.size()) {
+                    assert(span.lo() == buf_ + lo_pos_);
+
+                    lo_pos_ += span.size();
+                } else {
+                    /* since .consume() that arrives at empty contents also resets .lo_pos .hi_pos,
+                     * we don't want to blow up when called with an empty span -- argument
+                     * may represent some pre-reset location in buffer
+                     */
+                }
+
+                if (lo_pos_ == hi_pos_) {
+                    lo_pos_ = 0;
+                    hi_pos_ = 0;
+                }
+            }
+
+            /**
+               @brief allocate buffer with desired amount of memory
+
+               @param buf_z     desired buffer size
+               @param align_z   alignment;  buffer memory will be aligned on this byte-boundary.
+            **/
+            void alloc(size_type buf_z, size_type align_z = sizeof(char)) {
+                /* properly reset (+ discard) any existing state */
+                this->reset();
+
+                is_owner_ = true;
+                if (buf_z)
+                    buf_ = new (std::align_val_t(align_z)) CharT [buf_z];
+                buf_z_ = buf_z;
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+            }
+
+            /**
+               @brief attach buffer to (unowned)  range of @p buf_z bytes starting at @p buf[0]
+
+               Buffer is not responsible for managing storage.
+
+               @post
+               1. buffer is empty
+               @post
+               2. buffer read position = buffer write position = 0
+            **/
+            void setbuf(CharT * buf, size_type buf_z) {
+                /* properly reset (+ discard) any existing state */
+                this->reset();
+
+                is_owner_ = false;
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+                buf_ = buf;
+                buf_z_ = buf_z;
+            }
+
+            /**
+               @brief revert buffer to empty state and possibly zero it
+
+               @param zero_buffer_flag   Zero buffer contents iff this is true
+
+               @post
+               1. buffer is empty
+               @post
+               2. buffer read position = buffer write position = 0
+            **/
+            void clear2empty(bool zero_buffer_flag) {
+                if (buf_ && zero_buffer_flag)
+                    explicit_bzero(buf_, buf_z_ * sizeof(CharT));
+
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+            }
+
+            /**
+               @brief swap representation with another buffer instance.
+            **/
+            void swap (buffer & x) {
+                std::swap(is_owner_, x.is_owner_);
+                std::swap(buf_, x.buf_);
+                std::swap(buf_z_, x.buf_z_);
+                std::swap(lo_pos_, x.lo_pos_);
+                std::swap(hi_pos_, x.hi_pos_);
+            }
+
+            /**
+               @brief reset buffer to an empty state and recover owned storage
+            **/
+            void reset() {
+                if (is_owner_ && buf_)
+                    delete [] buf_;
+
+                is_owner_ = false;
+                buf_ = nullptr;
+                buf_z_ = 0;
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+            }
+
+            /**
+               @brief move-assignment operator.
+               @param x   right-hand-side to move from.
+
+               @post
+               @p x is in a valid, empty,
+            **/
+            buffer & operator= (buffer && x) {
+                is_owner_ = x.is_owner_;
+                buf_ = x.buf_;
+                buf_z_ = x.buf_z_;
+                lo_pos_ = x.lo_pos_;
+                hi_pos_ = x.hi_pos_;
+
+                x.is_owner_ = false;
+                x.lo_pos_ = 0;
+                x.hi_pos_ = 0;
+                x.buf_ = nullptr;
+                x.buf_z_ = 0;
+
+                return *this;
+            }
+
+            /** @brief buffer is not assignable */
+            buffer & operator= (buffer & x) = delete;
+
+        private:
+            /** @brief true iff buffer is responsible for freeing storage at @c buf_ **/
+            bool is_owner_ = false;
+            /** @brief buffer contents.  buffer memory comprises @c buf_[0] to @c buf_[buf_z_] **/
+            CharT * buf_ = nullptr;
+            /** @brief buffer size (in units of CharT) **/
+            size_type buf_z_ = 0;
+
+            /** @brief buffer read (consume) position
+
+                @invariant
+                0 <= lo_pos_ <= hi_pos_ < buf_z_
+            **/
+            size_type lo_pos_ = 0;
+            /** @brief buffer write (produce) position
+
+                @invariant
+                0 <= hi_pos_ < hi_pos_ < buf_z_
+            **/
+            size_type hi_pos_ = 0;
+        };
+
+        /** @brief Overload for @c swap,  so that @c buffer<CharT> swappable **/
+        template <typename CharT>
+        inline void
+        swap(buffer<CharT> & lhs,
+             buffer<CharT> & rhs) {
+            lhs.swap(rhs);
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end buffer.hpp */
diff --git a/include/xo/tokenizer2/scan_result.hpp b/include/xo/tokenizer2/scan_result.hpp
new file mode 100644
index 00000000..971e4b93
--- /dev/null
+++ b/include/xo/tokenizer2/scan_result.hpp
@@ -0,0 +1,81 @@
+/* file scan_result.hpp
+ *
+ * author: Roland Conybeare, Jun 2025
+ */
+
+#pragma once
+
+#include "Token.hpp"
+#include "TokenizerError.hpp"
+#include "TkInputState.hpp"
+
+namespace xo {
+    namespace scm {
+        /** @class scan_result
+         *  @brief Represent result of parsing one input token.
+         *
+         * @code
+         *  Possible outcomes fall into several categories
+         *  (with T: @c token_.is_valid(), E: @cerror_.is_error())
+         *
+         *  | T     | E     | description                         |
+         *  |-------+-------+-------------------------------------|
+         *  | false | false | end of input, including end of line |
+         *  | true  | false | parsed token in T                   |
+         *  | false | true  | parse error in E                    |
+         *
+         * @endcode
+         **/
+        class scan_result {
+        public:
+            using CharT = char;
+            using token_type = Token;
+            using span_type = span<const CharT>;
+            using error_type = TokenizerError;
+            using input_state_type = TkInputState;
+
+        public:
+            scan_result(const Token & token,
+                        const span_type & consumed,
+                        const TokenizerError & error = TokenizerError())
+                : token_{token}, consumed_{consumed}, error_{error} {}
+
+            static scan_result make_whitespace(const span_type & prefix_input);
+            static scan_result make_partial(const span_type & prefix_input);
+            /**
+             *  @p error_src can be __FUNCTION__ from site where error generated.
+             *  @p error_msg error message
+             *  @p error_pos error position, relative to start of token
+             *  @p input_state_ref input state object;
+             *  copied into scan_result, and leaving input_state_ref.current_line cleared
+             **/
+            static scan_result make_error_consume_current_line(const char * error_src,
+                                                               std::string error_msg,
+                                                               size_t error_pos,
+                                                               input_state_type & input_state_ref);
+
+            bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
+            bool is_token() const { return token_.is_valid(); }
+            bool is_error() const { return error_.is_error(); }
+
+            const Token & get_token() const { return token_; }
+            const span_type & consumed() const { return consumed_; }
+            const TokenizerError & error() const { return error_; }
+
+        public:
+            /** Successfully parsed token, whenever tk_type != tokentype::tk_invalid.
+             *  Will be tokentype::tk_invalid in normal cause of events for valid input,
+             *  when consuming whitespace
+             **/
+            token_type token_;
+            /** input span represented by .token, on success. Otherwise not defined **/
+            span_type consumed_;
+            /** error description, whenever .error_.is_error() is true **/
+            TokenizerError error_;
+        };
+
+
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end scan_result.hpp */
diff --git a/include/xo/tokenizer2/span.hpp b/include/xo/tokenizer2/span.hpp
new file mode 100644
index 00000000..8cf7a4a7
--- /dev/null
+++ b/include/xo/tokenizer2/span.hpp
@@ -0,0 +1,291 @@
+/** @file span.hpp **/
+
+#pragma once
+
+#include "xo/indentlog/scope.hpp"
+#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <ostream>
+#include <cstdint>
+#include <cassert>
+
+namespace xo {
+    namespace scm {
+        /** @class span compression/span.hpp
+         *
+         *  @brief A contiguous range of characters,  without ownership.
+         *
+         *  @tparam CharT type for elements referred to by this span.
+         **/
+        template <typename CharT>
+        class span {
+        public:
+            /** @defgroup span-type-traits span type traits **/
+            ///@{
+
+            /** typealias for span size (in units of CharT) **/
+            using size_type = std::uint64_t;
+
+            ///@}
+
+        public:
+            /** @defgroup span-ctors span constructors **/
+            ///@{
+
+            /** null span **/
+            span() : lo_{nullptr}, hi_{nullptr} {}
+
+            /** Create span for the contiguous memory range [@p lo, @p hi) **/
+            span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}
+
+            /** explicit conversion from span<U> **/
+            template<typename CharU>
+            span(const span<CharU> & other,
+                 std::enable_if_t<std::is_convertible_v<CharU*, CharT*>
+                 && !std::is_same_v<CharU, CharT>> * = nullptr)
+                : lo_{other.lo()}, hi_{other.hi()} {}
+
+            /** copy ctor (explicit to avoid ambiguity with template ctor) **/
+            span(const span & other) = default;
+            span & operator=(const span & other) = default;
+
+            /** Create a null span (i.e. with null @p lo, @p hi pointers)
+             *  A null span can be concatenated with any other span
+             *  without triggering matching-endpoint asserts.
+             **/
+            static span make_null() { return span(static_cast<CharT*>(nullptr), static_cast<CharT*>(nullptr)); }
+
+            /** @brief create span for C-style string @p cstr **/
+            static span from_cstr(const CharT * cstr) {
+                CharT * lo = cstr;
+                CharT * hi = cstr ? cstr + strlen(cstr) : nullptr;
+
+                return span(lo, hi);
+            }
+
+            /** @brief create span from std::string @p str **/
+            static span from_string(const std::string& str) {
+                CharT * lo = &(*str.begin());
+                CharT * hi = &(*str.end());
+
+                return span(lo, hi);
+            }
+
+            /** @brief concatenate two contiguous spans */
+            static span concat(const span & span1, const span & span2) {
+                if (span1.is_null())
+                    return span2;
+                if (span2.is_null())
+                    return span1;
+
+                if (span1.hi() != span2.lo()) {
+                    scope log(XO_DEBUG(true));
+
+                    log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo()));
+                }
+
+                assert(span1.hi() == span2.lo());
+
+                CharT * lo = span1.lo();
+                CharT * hi = span2.hi();
+
+                return span(lo, hi);
+            }
+
+            ///@}
+
+            /** @defgroup span-access-methods **/
+            ///@{
+
+            CharT * lo() const { return lo_; } /* get member span::lo_ */
+            CharT * hi() const { return hi_; } /* get member span::hi_ */
+
+            ///@}
+
+            /** @defgroup span-general-methods **/
+            ///@{
+
+            /** @brief strip prefix until first occurence of '\n', including the newline **/
+            void discard_until_newline() {
+                for (const CharT * p = lo_; p < hi_; ++p) {
+                    if (*p == '\n') {
+                        lo_ = p + 1;
+                        return;
+                    }
+                }
+
+                lo_ = hi_;
+            }
+
+            /** Create new span over supplied type,
+             *  with identical (possibly misaligned) endpoints.
+             *
+             *  @warning
+             *  1. New span uses exactly the same memory addresses.
+             *     Endpoint pointers may not be aligned.
+             *  2. Implementation assumes code compiled with
+             *     @code -fno-strict-aliasing @endcode enabled.
+             *
+             *  @tparam OtherT element type for new span
+             **/
+            template <typename OtherT>
+            span<OtherT>
+            cast() const { return span<OtherT>(reinterpret_cast<OtherT *>(lo_),
+                                               reinterpret_cast<OtherT *>(hi_)); }
+
+            /** @brief create span including the first @p z members of this span. **/
+            span prefix(size_type z) const { return span(lo_, lo_ + z); }
+
+            /** @brief create span representing prefix up to (but not including) @p *p
+             **/
+            span prefix_upto(CharT * p) const {
+                if (p <= hi_)
+                    return span(lo_, p);
+                else
+                    return span(lo_, hi_);
+            }
+
+            /** @brief create span with first @p z members of this span removed **/
+            span after_prefix(size_type z) const {
+                if (lo_ + z > hi_)
+                    z = hi_ - lo_;
+
+                return span(lo_ + z, hi_);
+            }
+
+            /** @brief create span with @p prefix of this span removed **/
+            span after_prefix(const span & prefix) const {
+                if (!prefix.is_null() && (prefix.lo() != lo_)) {
+                    throw std::runtime_error
+                        ("after_prefix: expected prefix of this span");
+                }
+
+                return after_prefix(prefix.size());
+            }
+
+            /** Create span starting with position @p p.
+             *  Does boundary checking; will return empty span if @p p is outside @c [lo_,hi)
+             **/
+            span suffix_from(CharT * p) const {
+                if ((lo_ <= p) && (p <= hi_))
+                    return span(p, hi_);
+                else
+                    return span(hi_, hi_);
+            }
+
+            /** true iff this span is null.  distinct from empty. **/
+            bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
+            /** true iff this span is empty (comprises 0 elements). **/
+            bool empty() const { return lo_ == hi_; }
+            /** report the number of elements (of type CharT) in this span. **/
+            size_type size() const { return hi_ - lo_; }
+
+            /** increase extent of this spans to include @p x.
+             *  Requires @c hi() == @c x.lo()
+             **/
+            span & operator+=(const span & x) {
+                if (hi_ == x.lo_) {
+                    hi_ = x.hi_;
+                } else if (!x.is_null()) {
+                    assert(false);
+                }
+
+                return *this;
+            }
+
+            /** print representation for this span on stream @p os **/
+            void print(std::ostream & os) const {
+                os << "<span"
+                   << xtag("addr", (void*)lo_)
+                   << xtag("size", size())
+                   << " :text " << xo::print::quot(std::string_view(lo_, hi_))
+                   << ">";
+            }
+            ///@}
+
+        private:
+            /** @defgroup span-instance-vars **/
+            ///@{
+
+            /** start of span.
+                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
+            **/
+            CharT * lo_ = nullptr;
+
+            /** @brief end of span.
+                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
+            **/
+            CharT * hi_ = nullptr;
+
+            ///@}
+        }; /*span*/
+
+        /** @defgroup span-operators **/
+        ///@{
+
+        /** compare spans for equality.
+         *  Two spans are equal iff both endpoints match exactly.
+         **/
+        template <typename CharT>
+        inline bool
+        operator==(const span<CharT> & lhs, const span<CharT> & rhs) {
+            return ((lhs.lo() == rhs.lo())
+                    && (lhs.hi() == rhs.hi()));
+        }
+
+        /** compare spans for inequality.
+         *  Two spans are unequal if either paired endpoint differs.
+         **/
+        template <typename CharT>
+        inline bool
+        operator!=(const span<CharT> & lhs, const span<CharT> & rhs) {
+            return ((lhs.lo() != rhs.lo())
+                    || (lhs.hi() != rhs.hi()));
+        }
+
+        /** print a summary of @p x on stream @p os. Intended for diagnostics **/
+        template <typename CharT>
+        inline std::ostream &
+        operator<<(std::ostream & os,
+                   const span<CharT> & x) {
+            x.print(os);
+            return os;
+        }
+
+        ///@}
+    } /*namespace scm*/
+
+    namespace print {
+        template <typename CharT>
+        class printspan_impl {
+        public:
+            printspan_impl(xo::scm::span<CharT> x) : span_{x} {}
+
+            xo::scm::span<CharT> span_;
+        };
+
+        template <typename CharT>
+        printspan_impl<CharT> printspan(const xo::scm::span<CharT>& span) {
+            return printspan_impl<CharT>(span);
+        }
+
+        template <typename CharT>
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const printspan_impl<CharT> & x)
+        {
+            for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p)
+                os << *p;
+
+            return os;
+        }
+
+#ifndef ppdetail_atomic
+        template <typename CharT>        \
+        PPDETAIL_ATOMIC_BODY(printspan_impl<CharT>);
+
+        template <typename CharT>        \
+        PPDETAIL_ATOMIC_BODY(xo::scm::span<CharT>);
+#endif
+
+    }
+} /*namespace xo*/
diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
new file mode 100644
index 00000000..eeeb7dd0
--- /dev/null
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -0,0 +1,192 @@
+/** @file tokentype.hpp
+ *
+ *  author: Roland Conybeare, Jul 2024
+ **/
+
+#pragma once
+
+#include "xo/indentlog/print/tag.hpp" // for STRINGIFY
+#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <ostream>
+
+namespace xo {
+    namespace scm {
+        /** @enum tokentype
+         *  Enum to identify different schematika input token types
+         *
+         *  Schematica code examples:
+         *
+         *  @code
+         *    type point :: { xcoord : f64, ycoord : f64 };
+         *    type matrix :: array<double, 2>;  // 2-d array
+         *
+         *    decl hypot(x : f64, y : f64) -> f64;
+         *
+         *    def hypot(x : f64, y : f64) {
+         *      let
+         *        x2 = (x * x);
+         *        y2 = (y * y);
+         *        hypot2 = (x2 + y2);
+         *      in
+         *        sqrt(hypot2);
+         *    };
+         *
+         *    def someconst 4;
+         *
+         *    def foo(v : vec<i32>) {
+         *      def (pi : f64) = 3.1415926;
+         *      def (h : (f64,f64) -> f64) = hypot;
+         *
+         *      h = hypot3;
+         *    };
+         *
+         *    def matrixproduct(x : matrix, y : matrix) {
+         *      [i, j : x.row(i) * y.col(j)];
+         *    };
+         *  @endcode
+         **/
+        enum class tokentype {
+            /** sentinel value **/
+            tk_invalid = -1,
+
+            /** a boolean constant **/
+            tk_bool,
+
+            /** an integer constant (signed 64-bit integer) **/
+            tk_i64,
+
+            /** a 64-bit floating-point constant **/
+            tk_f64,
+
+            /** a string literal **/
+            tk_string,
+
+            /** a symbol **/
+            tk_symbol,
+
+            /** left-hand parenthesis @c '(' **/
+            tk_leftparen,
+
+            /** right-hand parenthesis @c ')' **/
+            tk_rightparen,
+
+            /** left-hand bracket @c '[' **/
+            tk_leftbracket,
+
+            /** right-hand bracket @c ']' **/
+            tk_rightbracket,
+
+            /** left-hand brace @c '{' **/
+            tk_leftbrace,
+
+            /** right-hand brace @c '}' **/
+            tk_rightbrace,
+
+            /** left-hand angle bracket @c '<' **/
+            tk_leftangle,
+
+            /** right-hand angle bracket @c '>' **/
+            tk_rightangle,
+
+            /** less-equal @c '<=' **/
+            tk_lessequal,
+
+            /** great-equal @c '>=' **/
+            tk_greatequal,
+
+            /** dot @c '.' **/
+            tk_dot,
+
+            /** comma @c ',' **/
+            tk_comma,
+
+            /** colon @c ':' **/
+            tk_colon,
+
+            /** double-colon @c '::' **/
+            tk_doublecolon,
+
+            /** semi-colon @c ';' **/
+            tk_semicolon,
+
+            /** single equals sign @c '=' **/
+            tk_singleassign,
+
+            /** assignment @c ':=' **/
+            tk_assign,
+
+            /** indirection @c '->' **/
+            tk_yields,
+
+            /** note: operators not treated as punctuation
+             *  'do-always' is a legal variable name,
+             *  as is 'maybe*2', 'maybe+1', 'path/to/foo'
+             **/
+
+            /** operator @c '+' **/
+            tk_plus,
+            /** operator @c '-' **/
+            tk_minus,
+            /** operator @c '*' **/
+            tk_star,
+            /** operator @c '/' **/
+            tk_slash,
+
+            /** operator @c '==' **/
+            tk_cmpeq,
+            /** operator @c '!=' **/
+            tk_cmpne,
+
+            /** keyword @c 'type' **/
+            tk_type,
+
+            /** keyword @c 'def' **/
+            tk_def,
+
+            /** keyword @c 'lambda' **/
+            tk_lambda,
+
+            /** keyword @c 'if' **/
+            tk_if,
+
+            /** keyworkd @c 'then' **/
+            tk_then,
+
+            /** keyword @c 'else' **/
+            tk_else,
+
+            /** keyword @c 'let' **/
+            tk_let,
+
+            /** keyword @c 'in' **/
+            tk_in,
+
+            /** keyword @c 'end' **/
+            tk_end,
+
+            /** counts number of entries **/
+            n_tokentype
+        }; /*tokentype*/
+
+        /** String representation for enum value.
+         *  For example @c tokentype_descr(tokentype::tk_if) -> @c "if"
+         **/
+        extern char const *
+        tokentype_descr(tokentype tk_type);
+
+        /** Print enum value for @p tk_type on stream @p os **/
+        inline std::ostream &
+        operator<< (std::ostream & os, tokentype tk_type) {
+            os << tokentype_descr(tk_type);
+            return os;
+        }
+    } /*namespace scm*/
+
+#ifndef ppdetail_atomic
+    namespace print {
+        PPDETAIL_ATOMIC(xo::scm::tokentype);
+    } /*namespace print*/
+#endif
+} /*namespace xo*/
+
+/* end tokentype.hpp */
diff --git a/src/tokenizer2/CMakeLists.txt b/src/tokenizer2/CMakeLists.txt
new file mode 100644
index 00000000..967535e2
--- /dev/null
+++ b/src/tokenizer2/CMakeLists.txt
@@ -0,0 +1,15 @@
+# tokenizer2/CMakeLists.txt
+
+set(SELF_LIB xo_tokenizer2)
+set(SELF_SRCS
+    Tokenizer.cpp
+    TokenizerError.cpp
+    TkInputState.cpp
+    scan_result.cpp
+    Token.cpp
+    tokentype.cpp)
+
+xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
+xo_dependency(${SELF_LIB} indentlog)
+
+# end CMakeLists.txt
diff --git a/src/tokenizer2/TkInputState.cpp b/src/tokenizer2/TkInputState.cpp
new file mode 100644
index 00000000..30db1dbb
--- /dev/null
+++ b/src/tokenizer2/TkInputState.cpp
@@ -0,0 +1,151 @@
+/** @file TkInputState.cpp
+ *
+ *  @author Roland Conybeare, Jun 2025
+ **/
+
+#include "TkInputState.hpp"
+
+namespace xo {
+    namespace scm {
+        using CharT = char;
+
+        bool
+        TkInputState::is_newline(CharT ch) {
+            return (ch == '\n');
+        }
+
+        bool
+        TkInputState::is_whitespace(CharT ch) {
+            switch(ch) {
+            case ' ': return true;
+            case '\t': return true;
+            case '\n': return true;
+            case '\r': return true;
+            }
+
+            return false;
+        }
+
+        TkInputState
+        TkInputState::rewind(std::size_t n) const
+        {
+            return TkInputState(this->current_line_,
+                                (n <= current_pos_) ? current_pos_ - n : 0,
+                                0 /*whitespace*/);
+        }
+
+        void
+        TkInputState::advance(size_t z)
+        {
+            scope log(XO_DEBUG(debug_flag_));
+
+            this->current_pos_ += z;
+
+            log && log(xtag("z", z), xtag("current_pos", current_pos_));
+        }
+
+        void
+        TkInputState::advance_until(const CharT * pos)
+        {
+            scope log(XO_DEBUG(debug_flag_));
+
+            assert(current_line_.lo() <= pos && pos <= current_line_.hi());
+
+            this->current_pos_ = pos - current_line_.lo();
+
+            log && log(xtag("current_pos", current_pos_));
+        }
+
+        auto
+        TkInputState::consume_current_line() -> span_type
+        {
+            span_type retval = current_line_;
+
+            this->discard_current_line();
+
+            return retval;
+        }
+
+        void
+        TkInputState::discard_current_line()
+        {
+            this->current_line_ = span_type::make_null();
+            this->current_pos_ = 0;
+            this->whitespace_ = 0;
+        }
+
+        auto
+        TkInputState::capture_current_line(const span_type & input,
+                                           bool eof_flag)
+            -> std::pair<input_error, span_type>
+        {
+            // see also discard_current_line()
+            // note: must capture entirety of first line,
+            //       for example including leading whitespace.
+            //       See discussion in tokenizer scan() method
+
+            scope log(XO_DEBUG(debug_flag_));
+
+            /* look ahead to {end of line, end of input}, whichever comes first */
+            const CharT * sol = input.lo();
+            const CharT * eol = sol;
+
+            if (sol == current_line_.lo()) {
+                log && log("short-circuit - current line already stashed");
+
+                /* nothing to do here */
+                return std::make_pair(input_error::ok, current_line_);
+            }
+
+            while ((eol < input.hi()) && (*eol != '\n'))
+                ++eol;
+
+            if (*eol == '\n') {
+                /* include \n at end-of-line */
+                ++eol;
+            } else {
+                if (!eof_flag) {
+                    /* caller expected to provide complete line of input. complain and ignore */
+                    return std::make_pair(input_error::incomplete,
+                                          input.prefix(0ul));
+                }
+            }
+
+            this->current_line_ = span_type(sol, eol);
+            this->current_pos_ = 0;
+            this->whitespace_ = 0;
+
+            log && log(xtag("current_line", print::printspan(current_line_)),
+                       xtag("current_pos", current_pos_));
+
+            return std::make_pair(input_error::ok,
+                                  span_type(sol, eol));
+        }
+
+        const CharT *
+        TkInputState::skip_leading_whitespace()
+        {
+            scope log(XO_DEBUG(debug_flag_));
+
+            const CharT * ix = current_line_.lo() + current_pos_;
+
+            this->whitespace_ = 0;
+
+            /* skip whitespace + remember beginning of most recent line */
+            while (is_whitespace(*ix) && (ix != current_line_.hi())) {
+                ++ix;
+
+                ++(this->whitespace_);
+            }
+
+            this->tk_start_ = ix - current_line_.lo();
+            this->current_pos_ = ix - current_line_.lo();
+
+            return ix;
+        }
+
+
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end TkInputState.cpp */
diff --git a/src/tokenizer2/Token.cpp b/src/tokenizer2/Token.cpp
new file mode 100644
index 00000000..f228d56e
--- /dev/null
+++ b/src/tokenizer2/Token.cpp
@@ -0,0 +1,259 @@
+/** @file token.cpp
+ *
+ *  author: Roland Conybeare
+ **/
+
+#include "Token.hpp"
+#include "xo/indentlog/print/tag.hpp"
+
+namespace xo {
+    namespace scm {
+
+        bool
+        Token::bool_value() const
+        {
+            if (tk_type_ != tokentype::tk_bool) {
+                throw (std::runtime_error
+                       (tostr("token::bool_value",
+                              ": token with type tk found where tk_bool expected",
+                              xtag("tk", tk_type_))));
+            }
+
+            if (text_ == "true")
+                return true;
+            if (text_ == "false")
+                return false;
+
+            throw (std::runtime_error
+                   (tostr("token::bool_value",
+                          ": unexpected input string tk_bool token",
+                          xtag("text", text_))));
+
+            return false;
+        }
+
+        std::int64_t
+        Token::i64_value() const
+        {
+            if (tk_type_ != tokentype::tk_i64) {
+                throw (std::runtime_error
+                       (tostr("token::i64_value",
+                              ": token with type tk found where tk_i64 expected",
+                              xtag("tk", tk_type_))));
+            }
+
+            if (text_.empty()) {
+                throw (std::runtime_error
+                       (tostr("token::i64_value",
+                              ": unexpected empty input string for tk_i64 token")));
+            }
+
+            int sign = 1;
+            int value = 0;
+            {
+                auto ix = text_.begin();
+                auto end_ix = text_.end();
+
+                char ch = *ix;
+
+                if (ch == '+') {
+                    ++ix;
+                } else if (ch == '-') {
+                    sign = -1;
+                    ++ix;
+                }
+
+                if (ix == end_ix) {
+                    throw (std::runtime_error
+                           (tostr("token::i64_value",
+                                  ": input text found where at least one digit expected",
+                                  xtag("text", text_))));
+                }
+
+                for (; ix != end_ix; ++ix) {
+                    char ch = *ix;
+
+                    if ((ch >= '0') && (ch <= '9')) {
+                        value *= 10;
+                        value += (ch - '0');
+                    } else {
+                        throw (std::runtime_error
+                               (tostr("token::i64_value",
+                                      ": unexpected char ch in integer token",
+                                      xtag("ch", ch))));
+                    }
+                }
+            }
+
+            return sign * value;
+        } /*i64_value*/
+
+        double
+        Token::f64_value() const
+        {
+            if (tk_type_ != tokentype::tk_f64) {
+                throw (std::runtime_error
+                       (tostr("token::f64_value",
+                              ": token with type tk found where tk_f64 expected",
+                              xtag("tk", tk_type_))));
+            }
+
+            if (text_.empty()) {
+                throw (std::runtime_error
+                       (tostr("token::f64_value",
+                              ": unexpected empty input string for tk_f64 token")));
+            }
+
+            int sign = 1;
+            /* integer representing denormalized unsigned mantissa
+             * (mantissa scaled by smallest power of 10 sufficient to make
+             *  it an integer)
+             */
+            std::int64_t mantissa = 0;
+            /* counts #of digits to the right of decimal point '.' */
+            int rh_digits = 0;
+            /* sign of exponent */
+            int exp_sign = 1;
+            /* value of exponenct = integer to the right of 'e' or 'E' */
+            int exponent = 0;
+
+            /* floating-point value will represent
+             *   sign * mantissa * 10^(sign*exponent - rh_digits)
+             */
+            {
+                auto ix = text_.begin();
+                auto end_ix = text_.end();
+
+                char ch = *ix;
+
+                if (ch == '+') {
+                    ++ix;
+                } else if (ch == '-') {
+                    sign = -1;
+                    ++ix;
+                }
+
+                if (ix == end_ix) {
+                    throw (std::runtime_error
+                           (tostr("token::f64_value",
+                                  ": input text found where at least one digit expected",
+                                  xtag("text", text_))));
+                }
+
+                /* true iff decimal point '.' present in mantissa */
+                bool have_decimal_point = false;
+                /* true iff exponent prefix 'e' or 'E' present */
+                //bool have_exponent = false;
+                /* counts number of digits in mantissa
+                 * (both before and after, but not including, any decimal point
+                 */
+                int m_digits = 0;
+                /* digits to the left of decimal point */
+                int lh_digits = 0;
+
+                /* loop over mantissa digits */
+                for (; ix != end_ix; ++ix) {
+                    char ch = *ix;
+
+                    if (ch == '.') {
+                        if (have_decimal_point) {
+                            throw (std::runtime_error
+                                   (tostr("token::f64_value",
+                                          ": input text found where at most one decimal point expected",
+                                          xtag("text", text_))));
+                        }
+
+                        have_decimal_point = true;
+                        lh_digits = m_digits;
+                    } else if ((ch >= '0') && (ch <= '9')) {
+                        mantissa *= 10;
+                        mantissa += (ch - '0');
+                        ++m_digits;
+                    } else if (ch == 'e' || ch == 'E') {
+                        //have_exponent = true;
+                        break; // done with mantissa
+                    } else {
+                        throw (std::runtime_error
+                               (tostr("token::i64_value",
+                                      ": unexpected char ch in integer token",
+                                      xtag("ch", ch))));
+                    }
+                }
+
+                if (have_decimal_point)
+                    rh_digits = m_digits - lh_digits;
+
+                if (ix != end_ix) {
+                    /* continue to read exponent */
+
+                    /* skip e|E */
+                    ++ix;
+
+                    if (ix == end_ix) {
+                        throw (std::runtime_error
+                               (tostr("token::f64_value",
+                                      ": on input text, expect at least one digit following exponent marker e|E",
+                                      xtag("text", text_))));
+                    }
+
+                    char ch = *ix;
+
+                    if (ch == '+') {
+                        ++ix; /*skip*/
+                    } else if (ch == '-') {
+                        exp_sign = -1;
+                        ++ix;
+                    }
+
+                    for (; ix != end_ix; ++ix) {
+                        char ch = *ix;
+
+                        if ((ch >= '0') && (ch <= '9')) {
+                            exponent *= 10;
+                            exponent += (ch - '0');
+                        } else {
+                            throw (std::runtime_error
+                                   (tostr("token::f64_value",
+                                          "; on input text, expect only digits following"
+                                          " (possibly signed) exponenct marker",
+                                          xtag("text", text_))));
+                        }
+                    }
+                }
+            }
+
+            /* floating-point value will represent
+             *   sign * mantissa * 10^(sign*exponent - rh_digits)
+             */
+
+            double mantissa_f64 = sign * mantissa;
+
+#ifdef OBSOLETE_DEBUG
+            std::cerr << xtag("text", text_)
+                      << xtag("rh_digits", rh_digits)
+                      << xtag("mantissa_f64", mantissa_f64)
+                      << xtag("exp_sign", exp_sign)
+                      << xtag("exponent", exponent)
+                      << std::endl;
+#endif
+
+            double retval = (mantissa_f64
+                             * detail::pow10((exp_sign * exponent)
+                                             - rh_digits));
+
+            return retval;
+        } /*f64_value*/
+
+        void
+        Token::print(std::ostream & os) const
+        {
+            os << "<token"
+               << xtag("type", tk_type_);
+            if (has_variable_text())
+                os << xtag("text", text_);
+            os << ">";
+        } /*print*/
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end token.cpp */
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
new file mode 100644
index 00000000..00ef4eec
--- /dev/null
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -0,0 +1,836 @@
+/** @file Tokenizer.cpp
+ *
+ *  @author Roland Conybeare, Jul 2024
+ **/
+
+#include "Tokenizer.hpp"
+
+namespace xo {
+    namespace scm {
+        Tokenizer::Tokenizer(bool debug_flag)
+            : input_state_{debug_flag}
+        {}
+
+        void
+        Tokenizer::discard_current_line()
+        {
+            this->input_state_.discard_current_line();
+        }
+
+        bool
+        Tokenizer::is_1char_punctuation(CharT ch)
+        {
+            switch(ch) {
+            case '(':
+                return true;
+            case ')':
+                return true;
+            case '[':
+                return true;
+            case ']':
+                return true;
+            case '{':
+                return true;
+            case '}':
+                return true;
+            case '<':
+                /* can't be 1char punctuation -- can begin lessequal token */
+                return false;
+            case '>':
+                /* can't be 1char punctuation -- can begin greatequal token,
+                 * and appears in tk_yields token
+                 */
+                return false;
+            case ',':
+                return true;
+            case ';':
+                return true;
+            case ':':
+                /* can't be 1char punctuation -- can begin assignment token */
+                return false;
+            case '=':
+                /* can't be 1char punctuation -- can begin comparison token '==' */
+                return false;
+            case '!':
+                /* can't be 1char punctuation -- can begin comparison token '!=' */
+                return false;
+            case '-':
+                /* can't be punctuation
+                 * - can appear inside f64 token: e.g. 1.23e-9.
+                 * - begins tk_yields token: ->
+                 */
+                return false;
+            case '+':
+                /* can't be punctuation -- can appear inside f64 token: e.g. 1.23e+4 */
+                return false;
+            case '*':
+                /* not punctuation -- allowed in symbol */
+                return false;
+            case '/':
+                /* not punctuation -- for symmetry with +,- */
+                return false;
+            case '.':
+                /* can't be punctuation -- can appear inside f64 token: e.g. 1.23 */
+                return false;
+            }
+
+            return false;
+        }
+
+        bool
+        Tokenizer::is_2char_punctuation(CharT ch)
+        {
+            /* can't put '-' here, because of the way it appears in numeric literals
+             * characters here may not appear in symbol names
+             */
+
+            switch(ch) {
+            case '<':
+                /* can begin <= */
+                return true;
+            case '>':
+                /* can begin >= */
+                return true;
+            case ':':
+                /* can begin := */
+                return true;
+            case '=':
+                /* can begin == */
+                return true;
+            case '!':
+                /* can begin != */
+                return true;
+            }
+
+            return false;
+        }
+
+        auto
+        Tokenizer::assemble_token(std::size_t initial_whitespace,
+                                  const span_type & token_text,
+                                  input_state_type * p_input_state) -> result_type
+        {
+            /* literal|pretty|streamlined */
+            log_config::style = function_style::streamlined;
+
+            scope log(XO_DEBUG(p_input_state->debug_flag()));
+            log && log(xtag("token_text", token_text),
+                       xtag("initial_whitespace", initial_whitespace),
+                       xtag("input_state", *p_input_state));
+
+            tokentype tk_type = tokentype::tk_invalid;
+            std::string tk_text;
+
+            const CharT * tk_start = token_text.lo();
+            const CharT * tk_end = token_text.hi();
+
+            const CharT * ix = tk_start;
+
+            /* switch here applies to the first character in a token */
+            switch (*ix) {
+            case '-':
+            case '+':
+                if (token_text.size() == 1) {
+                    /* standalone '+' or '-' */
+                    if (*ix == '+')
+                        tk_type = tokentype::tk_plus;
+                    else if(*ix == '-')
+                        tk_type = tokentype::tk_minus;
+                }
+
+                /** fall through to numeric literal code below **/
+                [[fallthrough]];
+            case '.':
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+            {
+                /* examples of valid floating-point numbers:
+                 *   .0
+                 *   1e0
+                 *   1e
+                 *   0.
+                 *   +1e0
+                 *   -1e0
+                 *   +1E+2
+                 *   -1E+2
+                 *   -0.123e-10
+                 * non-examples:
+                 *   .
+                 *   -
+                 *   +
+                 *   e0
+                 *   .e0
+                 *   -.e-0
+                 *   +.e+0
+                 *
+                 * in particular: to be recognized as a number,
+                 * must contain at least one digit
+                 */
+
+                log && log("possible number-token");
+
+                /* true if initial sign -/+ encountered */
+                bool sign_flag = false;
+                /* true if '.' encountered */
+                bool period_flag = false;
+                /* true if 'e' | 'E' encountered.
+                 */
+                bool exponent_flag = false;
+                /* true when sign '-' | '+' precedes exponenct digits */
+                bool exponent_sign_flag = false;
+                /* true when at least one digit follows exponent marker */
+                bool exponent_digit_flag = false;
+                /* true if at least one digit encountered */
+                bool number_flag = false;
+
+                log && log(xtag("*ix", *ix),
+                           xtag("tk.length", token_text.size()));
+                if (log && (ix + 1 < tk_end))
+                    log(xtag("*(ix+1)", *(ix + 1)));
+
+                if ((*ix == '-') && (ix + 2 == token_text.hi()) && (*(ix + 1) == '>')) {
+                    /* composing exactly '->' */
+                    tk_type = tokentype::tk_yields;
+                } else {
+                    /* token (if valid) will be one of: {tk_i64, tk_f64, tk_dot}: */
+                    for (; ix != token_text.hi(); ++ix) {
+                        if ((*ix == '-') || (*ix == '+')) {
+                            /* sign allowed:
+                             * 1. before period and before first digit
+                             * 2. after exponent
+                             */
+                            if (!period_flag && !number_flag && !sign_flag) {
+                                sign_flag = true;
+                            } else if (exponent_flag && !exponent_digit_flag) {
+                                exponent_sign_flag = true;
+                            } else {
+                                return result_type::make_error_consume_current_line
+                                    (__FUNCTION__ /*src_function*/,
+                                     "improperly placed sign indicator",
+                                     (ix - tk_start),
+                                     *p_input_state);
+                            }
+                        } else if (*ix == '.') {
+                            if (period_flag) {
+                                return result_type::make_error_consume_current_line
+                                    (__FUNCTION__ /*src_function*/,
+                                     "duplicate decimal point in numeric literal",
+                                     (ix - tk_start),
+                                     *p_input_state);
+                            }
+
+                            period_flag = true;
+                        } else if ((*ix == 'e') || (*ix == 'E')) {
+                            if (exponent_flag) {
+                                return result_type::make_error_consume_current_line
+                                    (__FUNCTION__ /*src_function*/,
+                                     "duplicate exponent marker in numeric literal",
+                                     (ix - tk_start),
+                                     *p_input_state);
+                            }
+
+                            exponent_flag = true;
+                        } else if (isdigit(*ix)) {
+                            if (exponent_flag) {
+                                /* need digit before exponent to recognize as number */
+                                exponent_digit_flag = true;
+                            } else {
+                                number_flag = true;
+                            }
+                        } else {
+                            return result_type::make_error_consume_current_line
+                                (__FUNCTION__ /*src_function*/,
+                                 "unexpected character in numeric constant" /*error_description*/,
+                                 (ix - tk_start),
+                                 *p_input_state);
+                        }
+                    }
+
+                    if (number_flag) {
+                        if (period_flag || exponent_flag) {
+                            tk_type = tokentype::tk_f64;
+                        } else {
+                            tk_type = tokentype::tk_i64;
+                        }
+                    } else if (period_flag && !exponent_flag) {
+                        tk_type = tokentype::tk_dot;
+                    } else {
+                        /* not a valid token */
+                    }
+
+                    log && log(xtag("sign_flag", sign_flag));
+                    log && log(xtag("period_flag", period_flag),
+                               xtag("exponent_flag", exponent_flag),
+                               xtag("exponent_sign_flag", exponent_sign_flag),
+                               xtag("number_flag", number_flag));
+                    log && log(xtag("tk_type", tk_type));
+                }
+
+                break;
+            }
+            case '*':
+                if (token_text.size() == 1) {
+                    /* standalone '*' */
+                    tk_type = tokentype::tk_star;
+                    ++ix;
+                } else {
+                    /* '*' isn't punctuation -- but may allow appearance in a longer token
+                     *
+                     * thinking that x*y is a symbol with an embedded '*' character;
+                     * in particular want to support kebab-case symbols like 'foo-config'
+                     */
+                }
+                break;
+            case '/':
+                if (token_text.size() == 1) {
+                    /* standalone '/' */
+                    tk_type = tokentype::tk_slash;
+                    ++ix;
+                }
+                break;
+            case '=':
+                log && log("singleassign or cmpeq token");
+
+                if (*(ix + 1) == '=') {
+                    tk_type = tokentype::tk_cmpeq;
+                    ++ix;
+                    ++ix;
+                } else {
+                    /* standalone '=' */
+                    tk_type = tokentype::tk_singleassign;
+                    ++ix;
+                }
+                break;
+            case '!':
+                if (*(ix + 1) == '=') {
+                    tk_type = tokentype::tk_cmpne;
+                    ++ix;
+                    ++ix;
+                } else {
+                    /* standlone '!' */
+
+                    // TODO
+                }
+                break;
+            case '"':
+            {
+                log && log("recognize string-token");
+
+                tk_type = tokentype::tk_string;
+
+                tk_text.reserve(token_text.hi() - token_text.lo());
+
+                ++ix; /*skip initial " char*/
+
+                /* true on final " */
+                bool endofstring = false;
+
+                for (; ix != token_text.hi(); ++ix) {
+                    log && log(xtag("*ix", *ix));
+
+                    switch(*ix) {
+                    case '"':
+                        endofstring = true;
+
+                        /* skip final " char, don't capture */
+                        ++ix;
+
+                        break;
+                    case '\\':
+                        /* skip escape char, don't capture */
+                        ++ix;
+
+                        if (ix == token_text.hi()) {
+                            return result_type::make_error_consume_current_line
+                                (__FUNCTION__ /*src_function*/,
+                                 "expecting key following escape character \\",
+                                 (ix - tk_start),
+                                 *p_input_state);
+                        }
+
+                        switch(*ix) {
+                        case '\\':
+                            log && log(xtag("*ix", *ix), xtag("escaped", "t"));
+                            tk_text.push_back(*ix);
+                            break;
+                        case 'n':
+                            log && log(xtag("*ix", *ix), xtag("newline", "t"));
+                            tk_text.push_back('\n');
+                            break;
+                        case 't':
+                            log && log(xtag("*ix", *ix), xtag("tab", "t"));
+                            tk_text.push_back('\t');
+                            break;
+                        case 'r':
+                            log && log(xtag("*ix", *ix), xtag("cr", "t"));
+                            tk_text.push_back('\r');
+                            break;
+                        case '"':
+                            log && log(xtag("*ix", *ix), xtag("quote", "t"));
+                            tk_text.push_back('"');
+                            break;
+                        default:
+                            return result_type::make_error_consume_current_line
+                                (__FUNCTION__ /*src_function*/,
+                                 "expecting one of n|r|\"|\\ following escape \\",
+                                 (ix - tk_start),
+                                 *p_input_state);
+                        }
+                        break;
+                    default:
+                        tk_text.push_back(*ix);
+                        break;
+                    }
+
+                    if (endofstring)
+                        break;
+                }
+
+                if (!endofstring) {
+                    return result_type::make_error_consume_current_line
+                        (__FUNCTION__ /*src_function*/,
+                         "missing terminating '\"' to complete literal string",
+                         (ix - tk_start),
+                         *p_input_state);
+                }
+
+                log && log(tostr("tokenizer::assemble_token",
+                                 xtag("tk_text", tk_text)));
+
+                break;
+            }
+            case 'a': case 'A':
+            case 'b': case 'B':
+            case 'c': case 'C':
+            case 'd': case 'D':
+            case 'e': case 'E':
+            case 'f': case 'F':
+            case 'g': case 'G':
+            case 'h': case 'H':
+            case 'i': case 'I':
+            case 'j': case 'J':
+            case 'k': case 'K':
+            case 'l': case 'L':
+            case 'm': case 'M':
+            case 'n': case 'N':
+            case 'o': case 'O':
+            case 'p': case 'P':
+            case 'q': case 'Q':
+            case 'r': case 'R':
+            case 's': case 'S':
+            case 't': case 'T':
+            case 'u': case 'U':
+            case 'v': case 'V':
+            case 'w': case 'W':
+            case 'x': case 'X':
+            case 'y': case 'Y':
+            case 'z': case 'Z':
+            {
+                /* symbol/identifier must begin with a letter?
+                 * we want to accept some other chars too.
+                 * specifically want to allow identifiers:
+                 *   this-is-the-way
+                 *   this+is+also+the+way
+                 *   how/much/is/that/doggy
+                 *   put*an*asterisk*in*that
+                 *   something%special%
+                 *
+                 * like pure lisp,  we don't allow:
+                 * - identifier beginning with digit
+                 * - period .
+                 *
+                 * unlike pure lisp,  we don't allow anywhere in a symbol:
+                 * - colon     :
+                 * - semicolon ;
+                 * - comma     ,
+                 *
+                 * also we don't allow symbols to begin with special chars
+                 */
+
+                tk_type = tokentype::tk_symbol;
+                break;
+            }
+            case '<':
+            {
+                log && log("leftangle or lessequal token");
+
+                if (*(ix + 1) == '=') {
+                    tk_type = tokentype::tk_lessequal;
+                    ++ix;
+                    ++ix;
+                } else {
+                    tk_type = tokentype::tk_leftangle;
+                    ++ix;
+                }
+                break;
+            }
+            case '>':
+            {
+                log && log("rightangle or greatequal token");
+
+                if (*(ix + 1) == '=') {
+                    tk_type = tokentype::tk_greatequal;
+                    ++ix;
+                    ++ix;
+                } else {
+                    tk_type = tokentype::tk_rightangle;
+                    ++ix;
+                }
+                break;
+            }
+            case '(':
+                tk_type = tokentype::tk_leftparen;
+                ++ix;
+                break;
+            case ')':
+                tk_type = tokentype::tk_rightparen;
+                ++ix;
+                break;
+            case '[':
+                tk_type = tokentype::tk_leftbracket;
+                ++ix;
+                break;
+            case ']':
+                tk_type = tokentype::tk_rightbracket;
+                ++ix;
+                break;
+            case '{':
+                tk_type = tokentype::tk_leftbrace;
+                ++ix;
+                break;
+            case '}':
+                tk_type = tokentype::tk_rightbrace;
+                ++ix;
+                break;
+            case ',':
+                tk_type = tokentype::tk_comma;
+                ++ix;
+                break;
+            case ';':
+                tk_type = tokentype::tk_semicolon;
+                ++ix;
+                break;
+            case ':':
+            {
+                log && log("colon or assignment token");
+
+                if (*(ix + 1) == '=') {
+                    tk_type = tokentype::tk_assign;
+                    ++ix;
+                    ++ix;
+                } else {
+                     tk_type = tokentype::tk_colon;
+                     ++ix;
+                }
+                break;
+            }
+            default:
+                break;
+            }
+
+            if (tk_type == tokentype::tk_invalid) {
+                return result_type::make_error_consume_current_line
+                    (__FUNCTION__ /*src_function*/,
+                     "illegal input character",
+                     (ix - tk_start),
+                     *p_input_state);
+            }
+
+            if ((tk_type == tokentype::tk_i64)
+                || (tk_type == tokentype::tk_f64)
+                || (tk_type == tokentype::tk_symbol))
+            {
+                /* note: capturing token text here;
+                 *       for numeric literals will re-parse in token::i64_value() / token::f64_value()
+                 */
+                tk_text = std::string(tk_start, tk_end);
+            } else if (tk_type == tokentype::tk_string) {
+                ; /* nothing to do here -- desired tk_text already constructed */
+            }
+
+            if (tk_type == tokentype::tk_symbol) {
+                /* check for keywords */
+
+                bool keep_text = false;
+
+                if ((tk_text == "true") || (tk_text == "false")) {
+                    tk_type = tokentype::tk_bool;
+                    keep_text = true;
+                } else if (tk_text == "type") {
+                    tk_type = tokentype::tk_type;
+                } else if (tk_text == "def") {
+                    tk_type = tokentype::tk_def;
+                } else if (tk_text == "lambda") {
+                    tk_type = tokentype::tk_lambda;
+                } else if (tk_text == "if") {
+                    tk_type = tokentype::tk_if;
+                } else if (tk_text == "then") {
+                    tk_type = tokentype::tk_then;
+                } else if (tk_text == "else") {
+                    tk_type = tokentype::tk_else;
+                } else if (tk_text == "let") {
+                    tk_type = tokentype::tk_let;
+                } else if (tk_text == "in") {
+                    tk_type = tokentype::tk_in;
+                } else if (tk_text == "end") {
+                    tk_type = tokentype::tk_end;
+                } else {
+                    /* keep as symbol */
+                    keep_text = true;
+                }
+
+                if (!keep_text)
+                    tk_text.clear();
+            }
+
+            /* input.prefix(0):
+             * require caller preserves current input line until it's entirely exhausted
+             */
+            return result_type(token_type(tk_type, std::move(tk_text)),
+                               p_input_state->current_line().prefix(0));
+        } /*assemble_token*/
+
+        auto
+        Tokenizer::assemble_final_token(const span_type & token_text,
+                                        input_state_type * p_input_state) -> result_type
+        {
+            return assemble_token(0 /*initial_whitespace*/,
+                                  token_text,
+                                  p_input_state);
+        }
+
+        auto
+        Tokenizer::scan(const span_type & input,
+                        bool eof_flag) -> result_type
+        {
+            scope log(XO_DEBUG(input_state_.debug_flag()));
+
+            log && log(xtag("input", input));
+
+            /* - Always at beginning of token when scan() invoked
+             * - scan will not report any portion of line as consumed until it has
+             *   emitted all tokens in that line.
+             *   rationale: caller is allowed to discard storage that
+             *   scan() reports as consumed. But will be holding that line
+             *   until all tokens have been read.
+             * - this means caller will typically call scan()
+             *   with the same input span multiple times
+             */
+
+            /* automagically no-ops when the same input presented twice */
+            this->input_state_.capture_current_line(input, eof_flag);
+
+            const CharT * ix = this->input_state_.skip_leading_whitespace();
+
+            if(ix == input.hi()) {
+                log && log("end input -> consume current line");
+
+                /* entirety of current line has been tokenized
+                 *  -> caller may consume it
+                 */
+                return result_type::make_whitespace(this->input_state_.consume_current_line());
+            }
+
+            /* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */
+
+            // TODO:
+            // 1. hoist complete_flag up here
+            // 2. use in each branch
+            // 3. common check for prefix-capturing after if-cascade below done
+
+            /* here: *ix is not whitespace */
+
+            auto whitespace_z = input_state_.whitespace();
+
+            log && log(xtag("whitespace_z", whitespace_z));
+
+            /* tk_start points to known beginning of token
+             * (after any whitespace)
+             *
+             * goal is to leave ix pointing to 1 char past the end of the token
+             */
+            const CharT * tk_start = ix;
+
+            if (is_1char_punctuation(*ix)) {
+                /* 1-character token */
+                ++ix;
+            } else if (is_2char_punctuation(*ix)) {
+                CharT ch1 = *ix;
+
+                (void)ch1;
+
+                ++ix;
+
+#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit
+                if (ix == input.hi()) {
+                    /* need more input to know if/when token complete */
+                    this->prefix_ += std::string(tk_start, input.hi());
+
+                    log && log(xtag("captured-prefix1", this->prefix_));
+                } else
+#endif
+                    {
+                    CharT ch2 = *ix;
+
+                    if (((ch2 >= '0') && (ch2 <= '9'))
+                        || ((ch2 >= 'A') && (ch2 <= 'Z'))
+                        || ((ch2 >= 'a') && (ch2 <= 'z')))
+                    {
+                        /* treat as 1 char punctuation */
+                        ;
+                    } else {
+                        /* include next char */
+                        ++ix;
+                    }
+                }
+            } else if (*ix == '"') {
+                bool complete_flag = false;
+
+                /* 1. embedded space/tab allowed in string literal.
+                 * 2. embedded newline/cr not allowed.
+                 */
+                CharT prev_ch = '"';
+
+                ++ix;
+
+                for (; ix != input.hi(); ++ix) {
+                    /* looking for unescaped " char to end literal */
+                    if (*ix == '"') {
+                        if (prev_ch != '\\') {
+                            ++ix;  /* include terminating " for assemble_token */
+                            complete_flag = true;
+                            break;
+                        }
+                    } else if ((*ix == '\n') || (*ix == '\r')) {
+                        log && log ("string literal with naked newline or CR");
+
+                        return result_type::make_error_consume_current_line
+                            (__FUNCTION__ /*src_function*/,
+                             "must use \\n or \\r to encode newline/cr in string literal",
+                             (ix - tk_start),
+                             this->input_state_);
+                    }
+
+                    prev_ch = *ix;
+                }
+
+                if (!complete_flag) {
+                    log && log("unterminated string literal");
+
+                    return result_type::make_error_consume_current_line
+                               (__FUNCTION__ /*src_function*/,
+                                "unterminated string literal",
+                                (ix - tk_start),
+                                this->input_state_);
+                }
+            } else {
+                /* ix is start of some token */
+
+                if (*ix == '-') {
+                    /* this section load-bearing for input '->' scanning from beginning of token */
+                    ++ix;
+
+                    if (ix == input.hi()) {
+                        /* need more input to know if/when token complete -- see captured-prefix5 below */
+                    } else {
+                        CharT ch2 = *ix;
+
+                        if (ch2 == '>') {
+                            /* include next char and complete token */
+                            ++ix;
+
+                            log && log("complete '->' token");
+
+                            this->input_state_.advance_until(ix);
+
+                            return assemble_token(whitespace_z,
+                                                  span_type(tk_start, ix) /*token*/,
+                                                  &(this->input_state_));
+                        }
+
+                        /* here: -123, -.5e-21 for example */
+                    }
+                } else if (*ix == '>') {
+                    /* this section load-bearing for input '>=' scanning from beginning of token.
+                     * Need this because '>' necessarily excluded from is_1char_punctuation()
+                     */
+                    ++ix;
+
+                    if (ix == input.hi()) {
+                        /* need more input to know if/when token complete -- see captured-prefix5 below */
+                    } else {
+                        CharT ch2 = *ix;
+
+                        if (ch2 != '=') {
+                            log && log("complete '>=' token");
+
+                            this->input_state_.advance_until(ix);
+
+                            /* ignore next char and complete token */
+                            return assemble_token(whitespace_z,
+                                                  span_type(tk_start, ix) /*token*/,
+                                                  &(this->input_state_));
+                        }
+
+                        /* here: >= for example */
+                    }
+                }
+
+                /* scan until:
+                 * - whitespace
+                 * - punctuation
+                 */
+                for (; ix != input.hi(); ++ix) {
+                    if (input_state_type::is_whitespace(*ix)
+                        || is_1char_punctuation(*ix)
+                        || is_2char_punctuation(*ix))
+                    {
+                        break;
+                    }
+
+                    /* this section load-bearing for input '>' after beginning of a token, e.g. p> */
+                    if ((ix > tk_start) && (*ix == '>'))
+                        break;
+
+                    /* this section load-bearing for input '->' at the end of another token, e.g. p->q */
+                    if (*ix == '-') {
+                        if (ix + 1 == input.hi()) {
+                            /* need more input to know if/when token complete
+                             *
+                             *   apple-banana   parses as: {tk_symbol: apple-banana}
+                             *   apple->        parses as: {tk_symbol: apple} {tk_yields}
+                             *   apple-         illegal (may not end symbol with '-')
+                             */
+                            break;
+                        }
+
+                        if (*(ix + 1) == '>') {
+                            /* treat '->' as punctuation;  complete preceding token */
+                            break;
+                        }
+                    }
+                }
+            }
+
+            log && log("assemble token z", xtag("token_z", ix - tk_start));
+
+            assert(tk_start < ix);
+
+            this->input_state_.advance_until(ix);
+
+            return assemble_token(whitespace_z,
+                                  span_type(tk_start, ix) /*token*/,
+                                  &(this->input_state_));
+        } /*scan*/
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end Tokenizer.cpp */
diff --git a/src/tokenizer2/TokenizerError.cpp b/src/tokenizer2/TokenizerError.cpp
new file mode 100644
index 00000000..ffe3c8b4
--- /dev/null
+++ b/src/tokenizer2/TokenizerError.cpp
@@ -0,0 +1,60 @@
+/** @file TokenizerError.cpp
+ *
+ *  @author Roland Conybeare, Jun 2025
+ **/
+
+#include "TokenizerError.hpp"
+
+namespace xo {
+    namespace scm {
+
+        void
+        TokenizerError::print(std::ostream & os) const
+        {
+            os << "<tokenizer-error"
+               << xtag("src-function", src_function_)
+               << xtag("message", error_description_)
+               << xtag("input", input_state_.current_line())
+               << xtag("whitespace", input_state_.whitespace())
+               << xtag("error-pos", error_pos_)
+               << ">";
+        }
+
+        void
+        TokenizerError::report(std::ostream & os) const
+        {
+            using namespace std;
+
+            if (!error_description_.empty()) {
+                const char * prefix = "input: ";
+                /* input_state.tk_start:    position of first character in token
+                 * input_state.current_pos: position of first character following preceding token.
+                 * error_pos:               position (relative to start) at which failure detected
+                 */
+                const size_t tk_start = input_state_.tk_start();
+                const size_t tk_indent = (strlen(prefix) + tk_start);
+                const size_t error_pos = 1 + tk_start + error_pos_;
+
+                os << "token col: " << tk_start << ", error col: " << error_pos << "\n";
+                os << prefix;
+                for (const char *p = input_state_.current_line().lo(),
+                         *e = input_state_.current_line().hi(); p < e; ++p)
+                {
+                    os << *p;
+                }
+                //os << endl;
+                os << std::setw(tk_indent) << " ";
+
+                for (size_t i = 0; i < error_pos_; ++i) {
+                    os << '_';
+                }
+                os << '^' << endl;
+
+                os << error_description_ << endl;
+            }
+        }
+
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end TokenizerError.cpp */
diff --git a/src/tokenizer2/scan_result.cpp b/src/tokenizer2/scan_result.cpp
new file mode 100644
index 00000000..05c5c0f7
--- /dev/null
+++ b/src/tokenizer2/scan_result.cpp
@@ -0,0 +1,43 @@
+/** @file scan_result.cpp
+ *
+ *  @author Roland Conybeare, 2025
+ **/
+
+#include "scan_result.hpp"
+
+namespace xo {
+    namespace scm {
+        scan_result
+        scan_result::make_whitespace(const span_type& whitespace_input)
+        {
+            return scan_result(token_type::invalid(), whitespace_input /*consumed*/);
+        }
+
+        scan_result
+        scan_result::make_partial(const span_type& prefix_input)
+        {
+            return scan_result(token_type::invalid(), prefix_input /*consumed*/);
+        }
+
+        scan_result
+        scan_result::make_error_consume_current_line(const char * error_src,
+                                                     std::string error_msg,
+                                                     size_t error_pos,
+                                                     input_state_type & input_state_ref)
+        {
+            /* report+consume entire input line */
+
+            /* copy before altered by .consume_current_line() */
+            input_state_type input_state_copy = input_state_ref;
+
+            return scan_result(token_type::invalid(),
+                               input_state_ref.consume_current_line(),
+                               error_type(error_src,
+                                          error_msg,
+                                          input_state_copy,
+                                          error_pos));
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end scan_result.cpp */
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
new file mode 100644
index 00000000..33d683de
--- /dev/null
+++ b/src/tokenizer2/tokentype.cpp
@@ -0,0 +1,74 @@
+/* file tokentype.cpp
+ *
+ * author: Roland Conybeare
+ */
+
+#include "tokentype.hpp"
+
+namespace xo {
+    namespace scm {
+        char const *
+        tokentype_descr(tokentype tk_type)
+        {
+#define CASE(x) case tokentype::x: return STRINGIFY(x)
+
+            switch(tk_type) {
+                CASE(tk_bool);
+                CASE(tk_i64);
+                CASE(tk_f64);
+                CASE(tk_string);
+                CASE(tk_symbol);
+                CASE(tk_leftparen);
+
+                CASE(tk_rightparen);
+                CASE(tk_leftbracket);
+                CASE(tk_rightbracket);
+                CASE(tk_leftbrace);
+                CASE(tk_rightbrace);
+
+                CASE(tk_leftangle);
+                CASE(tk_rightangle);
+                CASE(tk_lessequal);
+                CASE(tk_greatequal);
+                CASE(tk_dot);
+                CASE(tk_comma);
+                CASE(tk_colon);
+
+                CASE(tk_doublecolon);
+                CASE(tk_semicolon);
+                CASE(tk_singleassign);
+                CASE(tk_assign);
+                CASE(tk_yields);
+
+                CASE(tk_plus);
+                CASE(tk_minus);
+                CASE(tk_star);
+                CASE(tk_slash);
+
+                CASE(tk_cmpeq);
+                CASE(tk_cmpne);
+
+                CASE(tk_type);
+                CASE(tk_def);
+                CASE(tk_lambda);
+                CASE(tk_if);
+                CASE(tk_then);
+                CASE(tk_else);
+                CASE(tk_let);
+
+                CASE(tk_in);
+                CASE(tk_end);
+
+            case tokentype::tk_invalid:
+            case tokentype::n_tokentype:
+                return "?tokentype";
+            }
+
+#undef CASE
+
+            return "???";
+        } /*tokentype_descr*/
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end tokentype.cpp */

From 1575f8a14736c9f5e1d42b5a82cee360c4d88ce0 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sun, 11 Jan 2026 18:42:08 -0500
Subject: [PATCH 02/33] xo-tokenizer2: use xo-arena DCircularBuffer to buffer
 input line

---
 cmake/xo_tokenizer2Config.cmake.in       |   5 +-
 example/tokenrepl/tokenrepl.cpp          |  66 ++---
 include/xo/tokenizer2/TkInputState.hpp   |   6 +-
 include/xo/tokenizer2/Tokenizer.hpp      |  32 ++-
 include/xo/tokenizer2/TokenizerError.hpp |   2 +-
 include/xo/tokenizer2/scan_result.hpp    |   2 +-
 include/xo/tokenizer2/span.hpp           | 291 -----------------------
 src/tokenizer2/CMakeLists.txt            |   2 +
 src/tokenizer2/TkInputState.cpp          |   3 +-
 src/tokenizer2/Tokenizer.cpp             |  56 ++++-
 10 files changed, 106 insertions(+), 359 deletions(-)
 delete mode 100644 include/xo/tokenizer2/span.hpp

diff --git a/cmake/xo_tokenizer2Config.cmake.in b/cmake/xo_tokenizer2Config.cmake.in
index b5c3cd5c..13f1dac1 100644
--- a/cmake/xo_tokenizer2Config.cmake.in
+++ b/cmake/xo_tokenizer2Config.cmake.in
@@ -4,9 +4,10 @@ include(CMakeFindDependencyMacro)
 
 # note: changes to find_dependency() calls here
 #       must coordinate with xo_dependency() calls
-#       in CMakeLists.txt
+#       in src/tokenizer2/CMakeLists.txt
 #
-#find_dependency(xo_flatstring)
+find_dependency(xo_arena)
+find_dependency(indentlog)
 
 include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
 check_required_components("@PROJECT_NAME@")
diff --git a/example/tokenrepl/tokenrepl.cpp b/example/tokenrepl/tokenrepl.cpp
index f97b9cd0..0852f028 100644
--- a/example/tokenrepl/tokenrepl.cpp
+++ b/example/tokenrepl/tokenrepl.cpp
@@ -3,7 +3,7 @@
 #include <xo/tokenizer2/Tokenizer.hpp>
 #include <xo/tokenizer2/Token.hpp>
 #include <xo/tokenizer2/tokentype.hpp>
-#include <xo/tokenizer2/span.hpp>
+#include <xo/arena/span.hpp>
 #include <xo/indentlog/log_config.hpp>
 #include <replxx.hxx>
 #include <iostream>
@@ -14,7 +14,7 @@
 bool replxx_getline(bool interactive,
                     std::size_t parser_stack_size,
                     replxx::Replxx & rx,
-                    std::string& input)
+                    const char ** p_input)
 {
     using namespace std;
 
@@ -34,40 +34,23 @@ bool replxx_getline(bool interactive,
     if (retval) {
         //cerr << "got reval->true" << endl;
 
-        input = input_cstr;
+        *p_input = input_cstr;
 
     } else {
         //cerr << "got retval->false" << endl;
     }
 
-    rx.history_add(input);
-
-    // we want tokenizer to see newline, it's syntax
-    input.push_back('\n');
+    rx.history_add(input_cstr);
 
     return retval;
 }
 
-#ifdef OBSOLETE
-bool repl_getline(bool interactive,
-                  std::istream & in,
-                  std::ostream & out,
-                  std::string & input)
-{
-    if (interactive) {
-        out << "> ";
-        std::flush(out);
-    }
-
-    return static_cast<bool>(std::getline(in, input));
-}
-#endif
-
 int
 main() {
     using xo::scm::Tokenizer;
-    using xo::scm::span;
     using xo::scm::operator<<;
+    using xo::mm::CircularBufferConfig;
+    using xo::mm::span;
     using replxx::Replxx;
 
     using namespace std;
@@ -82,36 +65,39 @@ main() {
     rx.set_max_history_size(1000);
     rx.history_load("repl_history.txt");
 
-    Tokenizer tkz(xo::log_config::min_log_level <= xo::log_level::info);
+    Tokenizer tkz(CircularBufferConfig{.name_ = "tokenrepl-input",
+                                       .max_capacity_ = 4*1024,
+                                       .max_captured_span_ = 128},
+                  true /*debug_flag*/);
 
-    string input_str;
+    const char * input_cstr = nullptr;;
 
     size_t line_no = 1;
 
     constexpr std::size_t c_maxlines = 25;
 
-    while (
-        //repl_getline(interactive, cin, cout, input_str)  // once upon a time
-        replxx_getline(interactive, 0 /*parser_stack_size*/, rx, input_str))
+    while (replxx_getline(interactive, 0 /*parser_stack_size*/, rx, &input_cstr))
     {
-        span_type input = span_type::from_string(input_str);
-
         //cout << "input: " << input << endl;
 
         // reminder: input may contain multiple tokens
-        while (!input.empty()) {
-            auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/);
+        if (input_cstr && *input_cstr) {
+            auto [error, input] = tkz.buffer_input_line(input_cstr, false /*!eof*/);
 
-            if (tk.is_valid()) {
-                cout << tk << endl;
-            } else if (error.is_error()) {
-                cout << "tokenizer error: " << endl;
-                error.report(cout);
+            {
+                auto [tk, consumed, error] = tkz.scan(input);
 
-                break;
+                if (tk.is_valid()) {
+                    cout << tk << endl;
+                } else if (error.is_error()) {
+                    cout << "tokenizer error: " << endl;
+                    error.report(cout);
+
+                    break;
+                }
+
+                input = input.after_prefix(consumed);
             }
-
-            input = input.after_prefix(consumed);
         }
 
         /* here: input.empty() or error encountered */
diff --git a/include/xo/tokenizer2/TkInputState.hpp b/include/xo/tokenizer2/TkInputState.hpp
index 531585a1..ea315a0a 100644
--- a/include/xo/tokenizer2/TkInputState.hpp
+++ b/include/xo/tokenizer2/TkInputState.hpp
@@ -63,7 +63,7 @@ namespace xo {
             using CharT = char;
 
             /** type representing a contiguous span of tokenizer input characters **/
-            using span_type = span<const CharT>;
+            using span_type = xo::mm::span<const CharT>;
 
             ///@}
 
@@ -76,7 +76,7 @@ namespace xo {
             /** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
              *  Introduced for unit tests, not used in tokenizer.
              **/
-            explicit TkInputState(const span<const CharT>& current_line,
+            explicit TkInputState(const span_type & current_line,
                                   size_t current_pos,
                                   size_t whitespace) : current_line_{current_line},
                                                        current_pos_{current_pos},
@@ -191,7 +191,7 @@ namespace xo {
             ///@{
 
             /** remember current input line.  Used only to report errors **/
-            span<const CharT> current_line_ = span<const CharT>();
+            span_type current_line_ = span_type();
             /** start of last token within @ref current_line_ **/
             size_t tk_start_ = 0;
             /** input position within @ref current_line_ **/
diff --git a/include/xo/tokenizer2/Tokenizer.hpp b/include/xo/tokenizer2/Tokenizer.hpp
index 99005fee..40a98cd9 100644
--- a/include/xo/tokenizer2/Tokenizer.hpp
+++ b/include/xo/tokenizer2/Tokenizer.hpp
@@ -9,8 +9,9 @@
 #include "TkInputState.hpp"
 #include "span.hpp"
 #include "scan_result.hpp"
-#include "xo/indentlog/scope.hpp"
-#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <xo/arena/DCircularBuffer.hpp>
+#include <xo/indentlog/scope.hpp>
+#include <xo/indentlog/print/ppdetail_atomic.hpp>
 #include <cassert>
 
 namespace xo {
@@ -58,15 +59,24 @@ namespace xo {
             using CharT = char;
             using token_type = Token;
             using error_type = TokenizerError;
-            using span_type = span<const CharT>;
-            using input_state_type = TkInputState;
+            using DCircularBuffer = xo::mm::DCircularBuffer;
+            using CircularBufferConfig = xo::mm::CircularBufferConfig;
+            using span_type = xo::mm::span<const CharT>;
+            //using input_state_type = TkInputState;
             using result_type = scan_result;
 
         public:
             /** @defgroup tokenizer-ctors tokenizer constructors **/
             ///@{
 
-            Tokenizer(bool debug_flag = false);
+            /**
+             *  @p config     gives configuration for circular input buffer
+             *  @p debug_flag enables tokenizer debug output
+             **/
+            Tokenizer(const CircularBufferConfig & config = CircularBufferConfig{.name_ = "tkz-input",
+                                                                                 .max_capacity_ = 4*1024,
+                                                                                 .max_captured_span_ = 128},
+                      bool debug_flag = false);
 
             ///@}
 
@@ -119,6 +129,11 @@ namespace xo {
              **/
             bool has_prefix() const { return !prefix_.empty(); }
 
+            /** buffer contents of input_cstr.
+             *  May throw if buffer space exhausted
+             **/
+            std::pair<input_error, span_type> buffer_input_line(const char * input_cstr, bool eof_flag);
+
             /** scan for next input token,  given @p input.
              *  Note:
              *  - tokenizer can consume input (e.g. whitespace)
@@ -130,8 +145,7 @@ namespace xo {
              *
              *  @return {parsed token, consumed span}
              **/
-            scan_result scan(const span_type & input,
-                             bool eof_flag);
+            scan_result scan(const span_type & input);
 
             /** discard current line after error.  Just cleans up error-reporting state **/
             void discard_current_line();
@@ -142,6 +156,8 @@ namespace xo {
             /** @defgroup tokenizer-instance-vars tokenizer instance variables **/
             ///@{
 
+            /** Buffer input here. vm-aware. uses mmap directly **/
+            DCircularBuffer input_buffer_;
             /** track input state (line#,pos,..) for error messages.
              *  There's an ordering problem here:
              *  1. input_state_.skip_leading_whitespace() advances
@@ -150,7 +166,7 @@ namespace xo {
              *  3. but neeed newline to end token
              *  Also recall input_state_type needed for reporting errors.
              **/
-            input_state_type input_state_;
+            TkInputState input_state_;
             /** Accumulate partial token here.
              *  This will happen if input sent to @ref tokenizer::scan
              *  ends without whitespace such that last available token's
diff --git a/include/xo/tokenizer2/TokenizerError.hpp b/include/xo/tokenizer2/TokenizerError.hpp
index a7fab3c2..a1cb99ee 100644
--- a/include/xo/tokenizer2/TokenizerError.hpp
+++ b/include/xo/tokenizer2/TokenizerError.hpp
@@ -20,7 +20,7 @@ namespace xo {
         class TokenizerError {
         public:
             using CharT = char;
-            using span_type = span<const CharT>;
+            using span_type = xo::mm::span<const CharT>;
 
         public:
             /** @defgroup tokenizer-error-ctors **/
diff --git a/include/xo/tokenizer2/scan_result.hpp b/include/xo/tokenizer2/scan_result.hpp
index 971e4b93..249154f1 100644
--- a/include/xo/tokenizer2/scan_result.hpp
+++ b/include/xo/tokenizer2/scan_result.hpp
@@ -30,7 +30,7 @@ namespace xo {
         public:
             using CharT = char;
             using token_type = Token;
-            using span_type = span<const CharT>;
+            using span_type = xo::mm::span<const CharT>;
             using error_type = TokenizerError;
             using input_state_type = TkInputState;
 
diff --git a/include/xo/tokenizer2/span.hpp b/include/xo/tokenizer2/span.hpp
deleted file mode 100644
index 8cf7a4a7..00000000
--- a/include/xo/tokenizer2/span.hpp
+++ /dev/null
@@ -1,291 +0,0 @@
-/** @file span.hpp **/
-
-#pragma once
-
-#include "xo/indentlog/scope.hpp"
-#include "xo/indentlog/print/ppdetail_atomic.hpp"
-#include <ostream>
-#include <cstdint>
-#include <cassert>
-
-namespace xo {
-    namespace scm {
-        /** @class span compression/span.hpp
-         *
-         *  @brief A contiguous range of characters,  without ownership.
-         *
-         *  @tparam CharT type for elements referred to by this span.
-         **/
-        template <typename CharT>
-        class span {
-        public:
-            /** @defgroup span-type-traits span type traits **/
-            ///@{
-
-            /** typealias for span size (in units of CharT) **/
-            using size_type = std::uint64_t;
-
-            ///@}
-
-        public:
-            /** @defgroup span-ctors span constructors **/
-            ///@{
-
-            /** null span **/
-            span() : lo_{nullptr}, hi_{nullptr} {}
-
-            /** Create span for the contiguous memory range [@p lo, @p hi) **/
-            span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}
-
-            /** explicit conversion from span<U> **/
-            template<typename CharU>
-            span(const span<CharU> & other,
-                 std::enable_if_t<std::is_convertible_v<CharU*, CharT*>
-                 && !std::is_same_v<CharU, CharT>> * = nullptr)
-                : lo_{other.lo()}, hi_{other.hi()} {}
-
-            /** copy ctor (explicit to avoid ambiguity with template ctor) **/
-            span(const span & other) = default;
-            span & operator=(const span & other) = default;
-
-            /** Create a null span (i.e. with null @p lo, @p hi pointers)
-             *  A null span can be concatenated with any other span
-             *  without triggering matching-endpoint asserts.
-             **/
-            static span make_null() { return span(static_cast<CharT*>(nullptr), static_cast<CharT*>(nullptr)); }
-
-            /** @brief create span for C-style string @p cstr **/
-            static span from_cstr(const CharT * cstr) {
-                CharT * lo = cstr;
-                CharT * hi = cstr ? cstr + strlen(cstr) : nullptr;
-
-                return span(lo, hi);
-            }
-
-            /** @brief create span from std::string @p str **/
-            static span from_string(const std::string& str) {
-                CharT * lo = &(*str.begin());
-                CharT * hi = &(*str.end());
-
-                return span(lo, hi);
-            }
-
-            /** @brief concatenate two contiguous spans */
-            static span concat(const span & span1, const span & span2) {
-                if (span1.is_null())
-                    return span2;
-                if (span2.is_null())
-                    return span1;
-
-                if (span1.hi() != span2.lo()) {
-                    scope log(XO_DEBUG(true));
-
-                    log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo()));
-                }
-
-                assert(span1.hi() == span2.lo());
-
-                CharT * lo = span1.lo();
-                CharT * hi = span2.hi();
-
-                return span(lo, hi);
-            }
-
-            ///@}
-
-            /** @defgroup span-access-methods **/
-            ///@{
-
-            CharT * lo() const { return lo_; } /* get member span::lo_ */
-            CharT * hi() const { return hi_; } /* get member span::hi_ */
-
-            ///@}
-
-            /** @defgroup span-general-methods **/
-            ///@{
-
-            /** @brief strip prefix until first occurence of '\n', including the newline **/
-            void discard_until_newline() {
-                for (const CharT * p = lo_; p < hi_; ++p) {
-                    if (*p == '\n') {
-                        lo_ = p + 1;
-                        return;
-                    }
-                }
-
-                lo_ = hi_;
-            }
-
-            /** Create new span over supplied type,
-             *  with identical (possibly misaligned) endpoints.
-             *
-             *  @warning
-             *  1. New span uses exactly the same memory addresses.
-             *     Endpoint pointers may not be aligned.
-             *  2. Implementation assumes code compiled with
-             *     @code -fno-strict-aliasing @endcode enabled.
-             *
-             *  @tparam OtherT element type for new span
-             **/
-            template <typename OtherT>
-            span<OtherT>
-            cast() const { return span<OtherT>(reinterpret_cast<OtherT *>(lo_),
-                                               reinterpret_cast<OtherT *>(hi_)); }
-
-            /** @brief create span including the first @p z members of this span. **/
-            span prefix(size_type z) const { return span(lo_, lo_ + z); }
-
-            /** @brief create span representing prefix up to (but not including) @p *p
-             **/
-            span prefix_upto(CharT * p) const {
-                if (p <= hi_)
-                    return span(lo_, p);
-                else
-                    return span(lo_, hi_);
-            }
-
-            /** @brief create span with first @p z members of this span removed **/
-            span after_prefix(size_type z) const {
-                if (lo_ + z > hi_)
-                    z = hi_ - lo_;
-
-                return span(lo_ + z, hi_);
-            }
-
-            /** @brief create span with @p prefix of this span removed **/
-            span after_prefix(const span & prefix) const {
-                if (!prefix.is_null() && (prefix.lo() != lo_)) {
-                    throw std::runtime_error
-                        ("after_prefix: expected prefix of this span");
-                }
-
-                return after_prefix(prefix.size());
-            }
-
-            /** Create span starting with position @p p.
-             *  Does boundary checking; will return empty span if @p p is outside @c [lo_,hi)
-             **/
-            span suffix_from(CharT * p) const {
-                if ((lo_ <= p) && (p <= hi_))
-                    return span(p, hi_);
-                else
-                    return span(hi_, hi_);
-            }
-
-            /** true iff this span is null.  distinct from empty. **/
-            bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
-            /** true iff this span is empty (comprises 0 elements). **/
-            bool empty() const { return lo_ == hi_; }
-            /** report the number of elements (of type CharT) in this span. **/
-            size_type size() const { return hi_ - lo_; }
-
-            /** increase extent of this spans to include @p x.
-             *  Requires @c hi() == @c x.lo()
-             **/
-            span & operator+=(const span & x) {
-                if (hi_ == x.lo_) {
-                    hi_ = x.hi_;
-                } else if (!x.is_null()) {
-                    assert(false);
-                }
-
-                return *this;
-            }
-
-            /** print representation for this span on stream @p os **/
-            void print(std::ostream & os) const {
-                os << "<span"
-                   << xtag("addr", (void*)lo_)
-                   << xtag("size", size())
-                   << " :text " << xo::print::quot(std::string_view(lo_, hi_))
-                   << ">";
-            }
-            ///@}
-
-        private:
-            /** @defgroup span-instance-vars **/
-            ///@{
-
-            /** start of span.
-                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
-            **/
-            CharT * lo_ = nullptr;
-
-            /** @brief end of span.
-                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
-            **/
-            CharT * hi_ = nullptr;
-
-            ///@}
-        }; /*span*/
-
-        /** @defgroup span-operators **/
-        ///@{
-
-        /** compare spans for equality.
-         *  Two spans are equal iff both endpoints match exactly.
-         **/
-        template <typename CharT>
-        inline bool
-        operator==(const span<CharT> & lhs, const span<CharT> & rhs) {
-            return ((lhs.lo() == rhs.lo())
-                    && (lhs.hi() == rhs.hi()));
-        }
-
-        /** compare spans for inequality.
-         *  Two spans are unequal if either paired endpoint differs.
-         **/
-        template <typename CharT>
-        inline bool
-        operator!=(const span<CharT> & lhs, const span<CharT> & rhs) {
-            return ((lhs.lo() != rhs.lo())
-                    || (lhs.hi() != rhs.hi()));
-        }
-
-        /** print a summary of @p x on stream @p os. Intended for diagnostics **/
-        template <typename CharT>
-        inline std::ostream &
-        operator<<(std::ostream & os,
-                   const span<CharT> & x) {
-            x.print(os);
-            return os;
-        }
-
-        ///@}
-    } /*namespace scm*/
-
-    namespace print {
-        template <typename CharT>
-        class printspan_impl {
-        public:
-            printspan_impl(xo::scm::span<CharT> x) : span_{x} {}
-
-            xo::scm::span<CharT> span_;
-        };
-
-        template <typename CharT>
-        printspan_impl<CharT> printspan(const xo::scm::span<CharT>& span) {
-            return printspan_impl<CharT>(span);
-        }
-
-        template <typename CharT>
-        inline std::ostream &
-        operator<< (std::ostream & os,
-                    const printspan_impl<CharT> & x)
-        {
-            for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p)
-                os << *p;
-
-            return os;
-        }
-
-#ifndef ppdetail_atomic
-        template <typename CharT>        \
-        PPDETAIL_ATOMIC_BODY(printspan_impl<CharT>);
-
-        template <typename CharT>        \
-        PPDETAIL_ATOMIC_BODY(xo::scm::span<CharT>);
-#endif
-
-    }
-} /*namespace xo*/
diff --git a/src/tokenizer2/CMakeLists.txt b/src/tokenizer2/CMakeLists.txt
index 967535e2..ccf1b551 100644
--- a/src/tokenizer2/CMakeLists.txt
+++ b/src/tokenizer2/CMakeLists.txt
@@ -10,6 +10,8 @@ set(SELF_SRCS
     tokentype.cpp)
 
 xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
+# deps must coordinate with xo-tokenizer/cmake/xo_tokenizer2Config.cmake.in
+xo_dependency(${SELF_LIB} xo_arena)
 xo_dependency(${SELF_LIB} indentlog)
 
 # end CMakeLists.txt
diff --git a/src/tokenizer2/TkInputState.cpp b/src/tokenizer2/TkInputState.cpp
index 30db1dbb..1eca02dd 100644
--- a/src/tokenizer2/TkInputState.cpp
+++ b/src/tokenizer2/TkInputState.cpp
@@ -84,7 +84,8 @@ namespace xo {
             //       for example including leading whitespace.
             //       See discussion in tokenizer scan() method
 
-            scope log(XO_DEBUG(debug_flag_));
+            scope log(XO_DEBUG(debug_flag_),
+                      xtag("input", input));
 
             /* look ahead to {end of line, end of input}, whichever comes first */
             const CharT * sol = input.lo();
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 00ef4eec..888a0c43 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -6,9 +6,13 @@
 #include "Tokenizer.hpp"
 
 namespace xo {
+    using std::byte;
+
     namespace scm {
-        Tokenizer::Tokenizer(bool debug_flag)
-            : input_state_{debug_flag}
+        Tokenizer::Tokenizer(const CircularBufferConfig & config,
+                             bool debug_flag)
+        : input_buffer_{DCircularBuffer::map(config)},
+          input_state_{debug_flag}
         {}
 
         void
@@ -108,7 +112,7 @@ namespace xo {
         auto
         Tokenizer::assemble_token(std::size_t initial_whitespace,
                                   const span_type & token_text,
-                                  input_state_type * p_input_state) -> result_type
+                                  TkInputState * p_input_state) -> result_type
         {
             /* literal|pretty|streamlined */
             log_config::style = function_style::streamlined;
@@ -600,7 +604,7 @@ namespace xo {
 
         auto
         Tokenizer::assemble_final_token(const span_type & token_text,
-                                        input_state_type * p_input_state) -> result_type
+                                        TkInputState * p_input_state) -> result_type
         {
             return assemble_token(0 /*initial_whitespace*/,
                                   token_text,
@@ -608,12 +612,43 @@ namespace xo {
         }
 
         auto
-        Tokenizer::scan(const span_type & input,
-                        bool eof_flag) -> result_type
+        Tokenizer::buffer_input_line(const char * input_cstr,
+                                     bool eof_flag) -> std::pair<input_error, span_type>
         {
             scope log(XO_DEBUG(input_state_.debug_flag()));
 
-            log && log(xtag("input", input));
+            log && log(xtag("input", input_cstr));
+
+            auto buf_input_0 = input_buffer_.input_range().hi();
+
+            auto remainder = input_buffer_.append
+                                 (DCircularBuffer::const_span_type
+                                      ((const byte *)input_cstr,
+                                       (const byte *)input_cstr + strlen(input_cstr)));
+
+            const char * newline_cstr = "\n";
+            auto remainder2 = input_buffer_.append
+                                  (DCircularBuffer::const_span_type
+                                       ((const byte *)newline_cstr,
+                                        (const byte *)newline_cstr + strlen(newline_cstr)));
+
+            if (!remainder.empty() || !remainder2.empty()) {
+                throw std::runtime_error(tostr("Tokenizer::buffer_line: line too long!",
+                                               xtag("remainder.size", remainder.size())));
+            }
+
+            auto buf_input_1 = input_buffer_.input_range().hi();
+
+            span_type input = span_type((const char *)buf_input_0,
+                                        (const char *)buf_input_1);
+
+            return this->input_state_.capture_current_line(input, eof_flag);
+        }
+
+        auto
+        Tokenizer::scan(const span_type & input) -> result_type
+        {
+            scope log(XO_DEBUG(input_state_.debug_flag()));
 
             /* - Always at beginning of token when scan() invoked
              * - scan will not report any portion of line as consumed until it has
@@ -625,9 +660,6 @@ namespace xo {
              *   with the same input span multiple times
              */
 
-            /* automagically no-ops when the same input presented twice */
-            this->input_state_.capture_current_line(input, eof_flag);
-
             const CharT * ix = this->input_state_.skip_leading_whitespace();
 
             if(ix == input.hi()) {
@@ -789,7 +821,7 @@ namespace xo {
                  * - punctuation
                  */
                 for (; ix != input.hi(); ++ix) {
-                    if (input_state_type::is_whitespace(*ix)
+                    if (TkInputState::is_whitespace(*ix)
                         || is_1char_punctuation(*ix)
                         || is_2char_punctuation(*ix))
                     {
@@ -829,7 +861,7 @@ namespace xo {
             return assemble_token(whitespace_z,
                                   span_type(tk_start, ix) /*token*/,
                                   &(this->input_state_));
-        } /*scan*/
+        } /*_scan_aux*/
     } /*namespace scm*/
 } /*namespace xo*/
 

From a7ed10c16a6011cc59ef0e4ce30da9bbfdfac4a0 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sun, 11 Jan 2026 19:10:42 -0500
Subject: [PATCH 03/33] xo-tokenizer: example tokenrepl restored to wokring
 order

Now with CBufferedInput in Tokenizer
---
 example/tokenrepl/tokenrepl.cpp | 15 ++++++++++++++-
 src/tokenizer2/Tokenizer.cpp    | 14 ++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/example/tokenrepl/tokenrepl.cpp b/example/tokenrepl/tokenrepl.cpp
index 0852f028..1cf02244 100644
--- a/example/tokenrepl/tokenrepl.cpp
+++ b/example/tokenrepl/tokenrepl.cpp
@@ -51,6 +51,8 @@ main() {
     using xo::scm::operator<<;
     using xo::mm::CircularBufferConfig;
     using xo::mm::span;
+    using xo::scope;
+    using xo::xtag;
     using replxx::Replxx;
 
     using namespace std;
@@ -65,10 +67,13 @@ main() {
     rx.set_max_history_size(1000);
     rx.history_load("repl_history.txt");
 
+    constexpr bool c_debug_flag = true;
+    scope log(XO_DEBUG(c_debug_flag));
+
     Tokenizer tkz(CircularBufferConfig{.name_ = "tokenrepl-input",
                                        .max_capacity_ = 4*1024,
                                        .max_captured_span_ = 128},
-                  true /*debug_flag*/);
+                  c_debug_flag);
 
     const char * input_cstr = nullptr;;
 
@@ -84,9 +89,17 @@ main() {
         if (input_cstr && *input_cstr) {
             auto [error, input] = tkz.buffer_input_line(input_cstr, false /*!eof*/);
 
+            if (log) {
+                log(xtag("msg", "buffered input line"));
+                log(xtag("input", input));
+            }
+
+            while (!input.empty())
             {
                 auto [tk, consumed, error] = tkz.scan(input);
 
+                log && log(xtag("consumed", consumed), xtag("tk", tk));
+
                 if (tk.is_valid()) {
                     cout << tk << endl;
                 } else if (error.is_error()) {
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 888a0c43..4fa98a97 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -622,15 +622,9 @@ namespace xo {
             auto buf_input_0 = input_buffer_.input_range().hi();
 
             auto remainder = input_buffer_.append
-                                 (DCircularBuffer::const_span_type
-                                      ((const byte *)input_cstr,
-                                       (const byte *)input_cstr + strlen(input_cstr)));
-
-            const char * newline_cstr = "\n";
+                                 (DCircularBuffer::const_span_type::from_cstr(input_cstr));
             auto remainder2 = input_buffer_.append
-                                  (DCircularBuffer::const_span_type
-                                       ((const byte *)newline_cstr,
-                                        (const byte *)newline_cstr + strlen(newline_cstr)));
+                                  (DCircularBuffer::const_span_type::from_cstr("\n"));
 
             if (!remainder.empty() || !remainder2.empty()) {
                 throw std::runtime_error(tostr("Tokenizer::buffer_line: line too long!",
@@ -639,8 +633,8 @@ namespace xo {
 
             auto buf_input_1 = input_buffer_.input_range().hi();
 
-            span_type input = span_type((const char *)buf_input_0,
-                                        (const char *)buf_input_1);
+            span_type input = span_type(buf_input_0,
+                                        buf_input_1);
 
             return this->input_state_.capture_current_line(input, eof_flag);
         }

From f25d1fb7384f60eddd416898a47f775ca4dd21bd Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sun, 18 Jan 2026 17:59:46 -0500
Subject: [PATCH 04/33] xo-reader2 scaffold (fomo+arena version of xo-reader/)
 [WIP]

---
 include/xo/tokenizer2/tokentype.hpp | 4 ++--
 src/tokenizer2/tokentype.cpp        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
index eeeb7dd0..91cb3622 100644
--- a/include/xo/tokenizer2/tokentype.hpp
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -164,8 +164,8 @@ namespace xo {
             /** keyword @c 'end' **/
             tk_end,
 
-            /** counts number of entries **/
-            n_tokentype
+            /** comes last, counts number of entries **/
+            N
         }; /*tokentype*/
 
         /** String representation for enum value.
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
index 33d683de..40c2dbfb 100644
--- a/src/tokenizer2/tokentype.cpp
+++ b/src/tokenizer2/tokentype.cpp
@@ -60,7 +60,7 @@ namespace xo {
                 CASE(tk_end);
 
             case tokentype::tk_invalid:
-            case tokentype::n_tokentype:
+            case tokentype::N:
                 return "?tokentype";
             }
 

From e3be7ed2de57f76f05643f8061c61c8fce237bf1 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Mon, 19 Jan 2026 00:39:16 -0500
Subject: [PATCH 05/33] xo-reader: + DDefineSsm + utest

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 0994e3b8..7ed490cc 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -132,7 +132,7 @@ namespace xo {
             /** token representing keyword @c type **/
             static Token type() { return Token(tokentype::tk_type); }
             /** token representing keyword @c def **/
-            static Token def() { return Token(tokentype::tk_def); }
+            static Token def_token() { return Token(tokentype::tk_def); }
             /** token representing keyword @c lambda **/
             static Token lambda() { return Token(tokentype::tk_lambda); }
             /** token representing keyword @c if **/

From 9d4b50ede4d4b94fd78309ef58cede5b5fd8e603 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Mon, 19 Jan 2026 11:33:14 -0500
Subject: [PATCH 06/33] xo-tokenizer2: cosmetic / minor

---
 src/tokenizer2/Tokenizer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 4fa98a97..7076a95d 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -595,10 +595,13 @@ namespace xo {
                     tk_text.clear();
             }
 
+            // TOOD: report tk_text as span,
+            //       but must pin / unpin
+
             /* input.prefix(0):
              * require caller preserves current input line until it's entirely exhausted
              */
-            return result_type(token_type(tk_type, std::move(tk_text)),
+            return result_type(Token(tk_type, std::move(tk_text)),
                                p_input_state->current_line().prefix(0));
         } /*assemble_token*/
 

From 7fadf9662e807c984b4a365d5a3ed6475a7edb11 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Tue, 20 Jan 2026 22:22:45 -0500
Subject: [PATCH 07/33] xo-reader2: DefineSsm handles colon token after lhs var

example:  def foo : f64 = 3.14;
---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 7ed490cc..fc448106 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -108,7 +108,7 @@ namespace xo {
             /** token representing comma @c "," **/
             static Token comma() { return Token(tokentype::tk_comma); }
             /** token representing colon @c ":" **/
-            static Token colon() { return Token(tokentype::tk_colon); }
+            static Token colon_token() { return Token(tokentype::tk_colon); }
             /** token representing double-colo @c "::" **/
             static Token doublecolon() { return Token(tokentype::tk_doublecolon); }
             /** token representing semicolon @c ";" **/

From b738afac9deee9eb074695b38202bf8724f92489 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Wed, 21 Jan 2026 12:59:06 -0500
Subject: [PATCH 08/33] xo-reader2: scaffold on_singleassign_token() in PSM

---
 include/xo/tokenizer2/Token.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index fc448106..d47b311d 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -101,7 +101,7 @@ namespace xo {
             static Token rightbracket() { return Token(tokentype::tk_rightbracket); }
             /** token representing left brace @c "{" **/
             static Token leftbrace() { return Token(tokentype::tk_leftbrace); }
-            /** token representing right brace @c "}' **/
+            /** token representing right brace @c "}" **/
             static Token rightbrace() { return Token(tokentype::tk_rightbrace); }
             /** token representing period @c "." **/
             static Token dot() { return Token(tokentype::tk_dot); }
@@ -113,8 +113,8 @@ namespace xo {
             static Token doublecolon() { return Token(tokentype::tk_doublecolon); }
             /** token representing semicolon @c ";" **/
             static Token semicolon() { return Token(tokentype::tk_semicolon); }
-            /** token representing single-assignment @c "=" **/
-            static Token singleassign() { return Token(tokentype::tk_singleassign); }
+            /** token representing single-assignment @c "=" (editor bait: equal_token) **/
+            static Token singleassign_token() { return Token(tokentype::tk_singleassign); }
             /** token representing unrestricted assignment @c ":=" **/
             static Token assign_token() { return Token(tokentype::tk_assign); }
             /** token representing indirection @c "->" **/

From 81dcd2eb714fb910f6438d82f91366014895dd02 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Thu, 22 Jan 2026 17:15:05 -0500
Subject: [PATCH 09/33] xo-reader2: + on_parsed_expression_with_semicolon +
 DefineSsm works

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index d47b311d..cc6e13d9 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -112,7 +112,7 @@ namespace xo {
             /** token representing double-colo @c "::" **/
             static Token doublecolon() { return Token(tokentype::tk_doublecolon); }
             /** token representing semicolon @c ";" **/
-            static Token semicolon() { return Token(tokentype::tk_semicolon); }
+            static Token semicolon_token() { return Token(tokentype::tk_semicolon); }
             /** token representing single-assignment @c "=" (editor bait: equal_token) **/
             static Token singleassign_token() { return Token(tokentype::tk_singleassign); }
             /** token representing unrestricted assignment @c ":=" **/

From 5d8f4b4b92ceb5a0a94829ad988d393551d84c22 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Thu, 22 Jan 2026 21:03:40 -0500
Subject: [PATCH 10/33] xo-reader2: working on example parser repl

---
 include/xo/tokenizer2/TokenizerError.hpp | 2 +-
 include/xo/tokenizer2/scan_result.hpp    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/xo/tokenizer2/TokenizerError.hpp b/include/xo/tokenizer2/TokenizerError.hpp
index a1cb99ee..b8a50988 100644
--- a/include/xo/tokenizer2/TokenizerError.hpp
+++ b/include/xo/tokenizer2/TokenizerError.hpp
@@ -99,7 +99,7 @@ namespace xo {
             size_t error_pos_ = 0;
 
             ///@}
-        }; /*error_token*/
+        };
 
         inline std::ostream &
         operator<< (std::ostream & os,
diff --git a/include/xo/tokenizer2/scan_result.hpp b/include/xo/tokenizer2/scan_result.hpp
index 249154f1..45718c5c 100644
--- a/include/xo/tokenizer2/scan_result.hpp
+++ b/include/xo/tokenizer2/scan_result.hpp
@@ -28,9 +28,9 @@ namespace xo {
          **/
         class scan_result {
         public:
-            using CharT = char;
+            //using CharT = char;
             using token_type = Token;
-            using span_type = xo::mm::span<const CharT>;
+            using span_type = xo::mm::span<const char>;
             using error_type = TokenizerError;
             using input_state_type = TkInputState;
 

From 9044e1d196418a3189d982b8c564556ea38e106c Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 23 Jan 2026 11:54:32 -0500
Subject: [PATCH 11/33] xo-reader2: + example app 'readerreplxx'

---
 example/tokenrepl/tokenrepl.cpp          | 45 ++++++++++++------------
 include/xo/tokenizer2/Tokenizer.hpp      |  5 +--
 include/xo/tokenizer2/TokenizerError.hpp | 16 ++++++---
 src/tokenizer2/Tokenizer.cpp             | 17 +++++----
 4 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/example/tokenrepl/tokenrepl.cpp b/example/tokenrepl/tokenrepl.cpp
index 1cf02244..d8ddbd7f 100644
--- a/example/tokenrepl/tokenrepl.cpp
+++ b/example/tokenrepl/tokenrepl.cpp
@@ -85,32 +85,33 @@ main() {
     {
         //cout << "input: " << input << endl;
 
+        auto input_ext = Tokenizer::span_type::from_cstr(input_cstr);
+
         // reminder: input may contain multiple tokens
-        if (input_cstr && *input_cstr) {
-            auto [error, input] = tkz.buffer_input_line(input_cstr, false /*!eof*/);
+        auto [error, input] = tkz.buffer_input_line(input_ext, false /*!eof*/);
 
-            if (log) {
-                log(xtag("msg", "buffered input line"));
-                log(xtag("input", input));
+        if (log) {
+            log(xtag("msg", "buffered input line"));
+            log(xtag("input", input));
+        }
+
+        while (!input.empty())
+        {
+            auto [tk, consumed, error] = tkz.scan(input);
+
+            log && log(xtag("consumed", consumed), xtag("tk", tk));
+
+            if (tk.is_valid()) {
+                cout << tk << endl;
+            } else if (error.is_error()) {
+                cout << "tokenizer error: " << endl;
+
+                error.report(cout);
+
+                break;
             }
 
-            while (!input.empty())
-            {
-                auto [tk, consumed, error] = tkz.scan(input);
-
-                log && log(xtag("consumed", consumed), xtag("tk", tk));
-
-                if (tk.is_valid()) {
-                    cout << tk << endl;
-                } else if (error.is_error()) {
-                    cout << "tokenizer error: " << endl;
-                    error.report(cout);
-
-                    break;
-                }
-
-                input = input.after_prefix(consumed);
-            }
+            input = input.after_prefix(consumed);
         }
 
         /* here: input.empty() or error encountered */
diff --git a/include/xo/tokenizer2/Tokenizer.hpp b/include/xo/tokenizer2/Tokenizer.hpp
index 40a98cd9..69843a5a 100644
--- a/include/xo/tokenizer2/Tokenizer.hpp
+++ b/include/xo/tokenizer2/Tokenizer.hpp
@@ -129,10 +129,11 @@ namespace xo {
              **/
             bool has_prefix() const { return !prefix_.empty(); }
 
-            /** buffer contents of input_cstr.
+            /** copy into buffer the  contents of @p input.
              *  May throw if buffer space exhausted
              **/
-            std::pair<input_error, span_type> buffer_input_line(const char * input_cstr, bool eof_flag);
+            std::pair<input_error, span_type> buffer_input_line(span_type input,
+                                                                bool eof_flag);
 
             /** scan for next input token,  given @p input.
              *  Note:
diff --git a/include/xo/tokenizer2/TokenizerError.hpp b/include/xo/tokenizer2/TokenizerError.hpp
index b8a50988..bf7702b1 100644
--- a/include/xo/tokenizer2/TokenizerError.hpp
+++ b/include/xo/tokenizer2/TokenizerError.hpp
@@ -32,7 +32,7 @@ namespace xo {
              *  @p tk_start   current position on entry to scanner
              *  @p error_pos  error location relative to token start
              **/
-            TokenizerError(const char * src_function,
+            TokenizerError(std::string_view src_function,
                            std::string error_description,
                            const TkInputState & input_state,
                            size_t error_pos)
@@ -46,12 +46,20 @@ namespace xo {
                     log && log(xtag("input_state.current_pos", input_state.current_pos()),
                                xtag("error_pos", error_pos));
                 }
+
+            TokenizerError with_error(std::string_view error_src_fn,
+                                      std::string error_msg) {
+                return TokenizerError(error_src_fn,
+                                      std::string(error_msg),
+                                      this->input_state_,
+                                      0 /*error_pos*/);
+            }
             ///@}
 
             /** @defgroup tokenizer-error-access-methods **/
             ///@{
 
-            const char * src_function() const { return src_function_; }
+            std::string_view src_function() const { return src_function_; }
             const std::string & error_description() const { return error_description_; }
 #pragma GCC diagnostic push
 #ifndef __APPLE__
@@ -88,8 +96,8 @@ namespace xo {
             ///@{
 
             /** source location (in tokenizer) at which error identified **/
-            char const * src_function_ = nullptr;
-            /** static error description **/
+            std::string_view src_function_;
+            /** error description **/
             std::string error_description_;
             /** input state associated with this error.
              *  Sufficient to precisely locate it with context.
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 7076a95d..2784072a 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -615,19 +615,18 @@ namespace xo {
         }
 
         auto
-        Tokenizer::buffer_input_line(const char * input_cstr,
+        Tokenizer::buffer_input_line(span_type input_ext,
                                      bool eof_flag) -> std::pair<input_error, span_type>
         {
             scope log(XO_DEBUG(input_state_.debug_flag()));
 
-            log && log(xtag("input", input_cstr));
+            log && log(xtag("input_ext", input_ext));
 
             auto buf_input_0 = input_buffer_.input_range().hi();
 
-            auto remainder = input_buffer_.append
-                                 (DCircularBuffer::const_span_type::from_cstr(input_cstr));
-            auto remainder2 = input_buffer_.append
-                                  (DCircularBuffer::const_span_type::from_cstr("\n"));
+            auto remainder = input_buffer_.append(input_ext);
+            auto remainder2 = input_buffer_.append(span_type::from_cstr("\n"));
+            //(DCircularBuffer::const_span_type::from_cstr("\n"));
 
             if (!remainder.empty() || !remainder2.empty()) {
                 throw std::runtime_error(tostr("Tokenizer::buffer_line: line too long!",
@@ -636,10 +635,10 @@ namespace xo {
 
             auto buf_input_1 = input_buffer_.input_range().hi();
 
-            span_type input = span_type(buf_input_0,
-                                        buf_input_1);
+            span_type input_ours = span_type(buf_input_0,
+                                             buf_input_1);
 
-            return this->input_state_.capture_current_line(input, eof_flag);
+            return this->input_state_.capture_current_line(input_ours, eof_flag);
         }
 
         auto

From 7432a0bd1d75350e640d18d50c76d6c8bddc9dc8 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 23 Jan 2026 14:57:43 -0500
Subject: [PATCH 12/33] xo-reader2: readerreplxx works + streamline debugging

---
 include/xo/tokenizer2/Tokenizer.hpp |  8 ++---
 src/tokenizer2/Tokenizer.cpp        | 51 ++++++++++++-----------------
 2 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/include/xo/tokenizer2/Tokenizer.hpp b/include/xo/tokenizer2/Tokenizer.hpp
index 69843a5a..3dc6da11 100644
--- a/include/xo/tokenizer2/Tokenizer.hpp
+++ b/include/xo/tokenizer2/Tokenizer.hpp
@@ -109,19 +109,19 @@ namespace xo {
             static bool is_2char_punctuation(CharT ch);
 
             /** assemble token from text @p token_text.
-             *  @p initial_whitespace   Amount of whitespace input being consumed from input.
+             *  @p ws_span  whitespace preceding token
              *  @p token_text subset of input_line representing a single token.
              *  @p p_input_state input state containing input_line.  On exit current line cleared
              *                   if error
              *
              *  retval.consumed will represent some possibly-empty prefix of @p input
              **/
-            static scan_result assemble_token(std::size_t initial_whitespace,
-                                              const span_type & token_text,
+            static scan_result assemble_token( span_type ws_span,
+                                              span_type token_text,
                                               TkInputState * p_input_state);
 
             /** degenerate version of assemble_token() on reaching end-of-file **/
-            static scan_result assemble_final_token(const span_type & token_text,
+            static scan_result assemble_final_token(span_type token_text,
                                                     TkInputState * p_input_state);
 
             /** true if tokenizer contains stored prefix of
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 2784072a..c79e10c3 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -110,8 +110,8 @@ namespace xo {
         }
 
         auto
-        Tokenizer::assemble_token(std::size_t initial_whitespace,
-                                  const span_type & token_text,
+        Tokenizer::assemble_token(span_type ws_span,
+                                  span_type token_text,
                                   TkInputState * p_input_state) -> result_type
         {
             /* literal|pretty|streamlined */
@@ -119,7 +119,7 @@ namespace xo {
 
             scope log(XO_DEBUG(p_input_state->debug_flag()));
             log && log(xtag("token_text", token_text),
-                       xtag("initial_whitespace", initial_whitespace),
+                       xtag("initial_whitespace", ws_span.size()),
                        xtag("input_state", *p_input_state));
 
             tokentype tk_type = tokentype::tk_invalid;
@@ -598,18 +598,16 @@ namespace xo {
             // TOOD: report tk_text as span,
             //       but must pin / unpin
 
-            /* input.prefix(0):
-             * require caller preserves current input line until it's entirely exhausted
-             */
             return result_type(Token(tk_type, std::move(tk_text)),
-                               p_input_state->current_line().prefix(0));
+                               span_type::concat(ws_span,
+                                                 span_type(tk_start, tk_end)));
         } /*assemble_token*/
 
         auto
-        Tokenizer::assemble_final_token(const span_type & token_text,
+        Tokenizer::assemble_final_token(span_type token_text,
                                         TkInputState * p_input_state) -> result_type
         {
-            return assemble_token(0 /*initial_whitespace*/,
+            return assemble_token(token_text.prefix(0) /*ws_span*/,
                                   token_text,
                                   p_input_state);
         }
@@ -645,6 +643,7 @@ namespace xo {
         Tokenizer::scan(const span_type & input) -> result_type
         {
             scope log(XO_DEBUG(input_state_.debug_flag()));
+            log && log(xtag("input", input));
 
             /* - Always at beginning of token when scan() invoked
              * - scan will not report any portion of line as consumed until it has
@@ -659,12 +658,14 @@ namespace xo {
             const CharT * ix = this->input_state_.skip_leading_whitespace();
 
             if(ix == input.hi()) {
-                log && log("end input -> consume current line");
+                log && log("end buffered input -> consume current line");
 
                 /* entirety of current line has been tokenized
                  *  -> caller may consume it
                  */
-                return result_type::make_whitespace(this->input_state_.consume_current_line());
+                this->input_state_.consume_current_line();
+
+                return result_type::make_whitespace(input);
             }
 
             /* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */
@@ -697,27 +698,17 @@ namespace xo {
 
                 ++ix;
 
-#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit
-                if (ix == input.hi()) {
-                    /* need more input to know if/when token complete */
-                    this->prefix_ += std::string(tk_start, input.hi());
+                CharT ch2 = *ix;
 
-                    log && log(xtag("captured-prefix1", this->prefix_));
-                } else
-#endif
-                    {
-                    CharT ch2 = *ix;
-
-                    if (((ch2 >= '0') && (ch2 <= '9'))
-                        || ((ch2 >= 'A') && (ch2 <= 'Z'))
-                        || ((ch2 >= 'a') && (ch2 <= 'z')))
+                if (((ch2 >= '0') && (ch2 <= '9'))
+                    || ((ch2 >= 'A') && (ch2 <= 'Z'))
+                    || ((ch2 >= 'a') && (ch2 <= 'z')))
                     {
                         /* treat as 1 char punctuation */
                         ;
                     } else {
-                        /* include next char */
-                        ++ix;
-                    }
+                    /* include next char */
+                    ++ix;
                 }
             } else if (*ix == '"') {
                 bool complete_flag = false;
@@ -779,7 +770,7 @@ namespace xo {
 
                             this->input_state_.advance_until(ix);
 
-                            return assemble_token(whitespace_z,
+                            return assemble_token(span_type(input.lo(), tk_start),
                                                   span_type(tk_start, ix) /*token*/,
                                                   &(this->input_state_));
                         }
@@ -803,7 +794,7 @@ namespace xo {
                             this->input_state_.advance_until(ix);
 
                             /* ignore next char and complete token */
-                            return assemble_token(whitespace_z,
+                            return assemble_token(span_type(input.lo(), tk_start),
                                                   span_type(tk_start, ix) /*token*/,
                                                   &(this->input_state_));
                         }
@@ -854,7 +845,7 @@ namespace xo {
 
             this->input_state_.advance_until(ix);
 
-            return assemble_token(whitespace_z,
+            return assemble_token(span_type(input.lo(), tk_start),
                                   span_type(tk_start, ix) /*token*/,
                                   &(this->input_state_));
         } /*_scan_aux*/

From f0cd32c05f3c2ac1d7e3d29eb520215ba4958b55 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Tue, 27 Jan 2026 15:50:10 -0500
Subject: [PATCH 13/33] xo-reader2: support if-then-else expressions. +
 detailed utest

---
 include/xo/tokenizer2/Token.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index cc6e13d9..38e73902 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -137,6 +137,8 @@ namespace xo {
             static Token lambda() { return Token(tokentype::tk_lambda); }
             /** token representing keyword @c if **/
             static Token if_token() { return Token(tokentype::tk_if); }
+            /** token representing keyword @c then **/
+            static Token then_token() { return Token(tokentype::tk_then); }
             /** token representing keyword @c else **/
             static Token else_token() { return Token(tokentype::tk_else); }
             /** token representing keyword @c let **/

From a28b45e4f2c51d7df9b1397666e9d1d6db808d5d Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Wed, 28 Jan 2026 10:57:55 -0500
Subject: [PATCH 14/33] xo-reader2 xo-expression2: + DLambdaSsm [WIP]

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 38e73902..f9807d05 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -134,7 +134,7 @@ namespace xo {
             /** token representing keyword @c def **/
             static Token def_token() { return Token(tokentype::tk_def); }
             /** token representing keyword @c lambda **/
-            static Token lambda() { return Token(tokentype::tk_lambda); }
+            static Token lambda_token() { return Token(tokentype::tk_lambda); }
             /** token representing keyword @c if **/
             static Token if_token() { return Token(tokentype::tk_if); }
             /** token representing keyword @c then **/

From 0f4e270707dc8a2278d70211b4a4d9b996a58c0f Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Wed, 28 Jan 2026 17:40:57 -0500
Subject: [PATCH 15/33] xo-reader2: + DExpectFormalArgSsm [WIP]

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index f9807d05..55a2d57d 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -92,7 +92,7 @@ namespace xo {
             /** token representing right angle bracket @c ">" **/
             static Token rightangle() { return Token(tokentype::tk_rightangle); }
             /** token representing left parenthesis @c "(" **/
-            static Token leftparen() { return Token(tokentype::tk_leftparen); }
+            static Token leftparen_token() { return Token(tokentype::tk_leftparen); }
             /** Token representing right parenthesis @c ")" **/
             static Token rightparen() { return Token(tokentype::tk_rightparen); }
             /** token representing left bracket @c "[" **/

From 83d210b96824d863690b2dffd5f5fddb215aad65 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Thu, 29 Jan 2026 13:48:24 -0500
Subject: [PATCH 16/33] xo-reader2: DExpectFormalArglistSsm parses multiple
 formals

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 55a2d57d..607bc0a4 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -106,7 +106,7 @@ namespace xo {
             /** token representing period @c "." **/
             static Token dot() { return Token(tokentype::tk_dot); }
             /** token representing comma @c "," **/
-            static Token comma() { return Token(tokentype::tk_comma); }
+            static Token comma_token() { return Token(tokentype::tk_comma); }
             /** token representing colon @c ":" **/
             static Token colon_token() { return Token(tokentype::tk_colon); }
             /** token representing double-colo @c "::" **/

From 900d675caac058e24e5113d78dc5e7d99c99be93 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 30 Jan 2026 10:26:35 -0500
Subject: [PATCH 17/33] xo-expression2 xo-reader2: local symtab stack in PSM

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 607bc0a4..66427c3e 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -94,7 +94,7 @@ namespace xo {
             /** token representing left parenthesis @c "(" **/
             static Token leftparen_token() { return Token(tokentype::tk_leftparen); }
             /** Token representing right parenthesis @c ")" **/
-            static Token rightparen() { return Token(tokentype::tk_rightparen); }
+            static Token rightparen_token() { return Token(tokentype::tk_rightparen); }
             /** token representing left bracket @c "[" **/
             static Token leftbracket() { return Token(tokentype::tk_leftbracket); }
             /** token representing right bracket @c "]" **/

From e3006f32666983e2040811f5512a1b93d9b35f15 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 30 Jan 2026 12:41:09 -0500
Subject: [PATCH 18/33] xo-reader2: + assemble lambda function type in
 DLambdaSsm

---
 include/xo/tokenizer2/Token.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 66427c3e..fc5dfc9a 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -100,7 +100,7 @@ namespace xo {
             /** token representing right bracket @c "]" **/
             static Token rightbracket() { return Token(tokentype::tk_rightbracket); }
             /** token representing left brace @c "{" **/
-            static Token leftbrace() { return Token(tokentype::tk_leftbrace); }
+            static Token leftbrace_token() { return Token(tokentype::tk_leftbrace); }
             /** token representing right brace @c "}" **/
             static Token rightbrace() { return Token(tokentype::tk_rightbrace); }
             /** token representing period @c "." **/
@@ -117,8 +117,8 @@ namespace xo {
             static Token singleassign_token() { return Token(tokentype::tk_singleassign); }
             /** token representing unrestricted assignment @c ":=" **/
             static Token assign_token() { return Token(tokentype::tk_assign); }
-            /** token representing indirection @c "->" **/
-            static Token yields() { return Token(tokentype::tk_yields); }
+            /** token representing indirection @c "->" / function return type  **/
+            static Token yields_token() { return Token(tokentype::tk_yields); }
 
             /** token for @c "+" **/
             static Token plus_token() { return Token(tokentype::tk_plus); }

From 75b9e62c842cf1777e7dab1236256dc65df11826 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sat, 31 Jan 2026 21:33:39 -0500
Subject: [PATCH 19/33] xo-reader2: DLambdaSsm work towards producing
 DLambdaExpr [WIP]

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index fc5dfc9a..ab3f0cb7 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -102,7 +102,7 @@ namespace xo {
             /** token representing left brace @c "{" **/
             static Token leftbrace_token() { return Token(tokentype::tk_leftbrace); }
             /** token representing right brace @c "}" **/
-            static Token rightbrace() { return Token(tokentype::tk_rightbrace); }
+            static Token rightbrace_token() { return Token(tokentype::tk_rightbrace); }
             /** token representing period @c "." **/
             static Token dot() { return Token(tokentype::tk_dot); }
             /** token representing comma @c "," **/

From 415a382442e496d2ac04fd2942aa68f84e9ee1a0 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Mon, 2 Feb 2026 21:55:34 -0500
Subject: [PATCH 20/33] xo-interpreter2: scaffold repl + alloc measurement
 frameowkr

---
 include/xo/tokenizer2/Tokenizer.hpp |  6 ++++++
 src/tokenizer2/Tokenizer.cpp        | 13 +++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/include/xo/tokenizer2/Tokenizer.hpp b/include/xo/tokenizer2/Tokenizer.hpp
index 3dc6da11..83015c03 100644
--- a/include/xo/tokenizer2/Tokenizer.hpp
+++ b/include/xo/tokenizer2/Tokenizer.hpp
@@ -61,6 +61,7 @@ namespace xo {
             using error_type = TokenizerError;
             using DCircularBuffer = xo::mm::DCircularBuffer;
             using CircularBufferConfig = xo::mm::CircularBufferConfig;
+            using MemorySizeInfo = xo::mm::MemorySizeInfo;
             using span_type = xo::mm::span<const CharT>;
             //using input_state_type = TkInputState;
             using result_type = scan_result;
@@ -90,6 +91,11 @@ namespace xo {
             const TkInputState & input_state() const { return input_state_; }
 #pragma GCC diagnostic pop
 
+            /** number of distinct memory pools owned by tokenizer **/
+            std::size_t _n_store() const noexcept;
+            /** memory consumption for i'th memory pool **/
+            MemorySizeInfo _store_info(std::size_t i) const noexcept;
+
             ///@}
 
             /** @defgroup tokenizer-general-methods tokenizer methods **/
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index c79e10c3..c36d85a5 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -6,6 +6,7 @@
 #include "Tokenizer.hpp"
 
 namespace xo {
+    using xo::mm::MemorySizeInfo;
     using std::byte;
 
     namespace scm {
@@ -21,6 +22,18 @@ namespace xo {
             this->input_state_.discard_current_line();
         }
 
+        std::size_t
+        Tokenizer::_n_store() const noexcept
+        {
+            return input_buffer_._n_store();
+        }
+
+        MemorySizeInfo
+        Tokenizer::_store_info(std::size_t i) const noexcept
+        {
+            return input_buffer_._store_info(i);
+        }
+
         bool
         Tokenizer::is_1char_punctuation(CharT ch)
         {

From 3f5bd39ed8e5a38a7d44052c7be29b73a62d5de3 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Tue, 3 Feb 2026 01:05:36 -0500
Subject: [PATCH 21/33] xo-interpreter2 .. xo-arena. memory pool introspection

---
 include/xo/tokenizer2/Tokenizer.hpp |  8 +++-----
 src/tokenizer2/Tokenizer.cpp        | 12 +++---------
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/include/xo/tokenizer2/Tokenizer.hpp b/include/xo/tokenizer2/Tokenizer.hpp
index 83015c03..0a212a8f 100644
--- a/include/xo/tokenizer2/Tokenizer.hpp
+++ b/include/xo/tokenizer2/Tokenizer.hpp
@@ -61,7 +61,7 @@ namespace xo {
             using error_type = TokenizerError;
             using DCircularBuffer = xo::mm::DCircularBuffer;
             using CircularBufferConfig = xo::mm::CircularBufferConfig;
-            using MemorySizeInfo = xo::mm::MemorySizeInfo;
+            using MemorySizeVisitor = xo::mm::MemorySizeVisitor;
             using span_type = xo::mm::span<const CharT>;
             //using input_state_type = TkInputState;
             using result_type = scan_result;
@@ -91,10 +91,8 @@ namespace xo {
             const TkInputState & input_state() const { return input_state_; }
 #pragma GCC diagnostic pop
 
-            /** number of distinct memory pools owned by tokenizer **/
-            std::size_t _n_store() const noexcept;
-            /** memory consumption for i'th memory pool **/
-            MemorySizeInfo _store_info(std::size_t i) const noexcept;
+            /** visit tokenizer-owned memory pools; invoke visitor(info) for each one **/
+            void visit_pools(const MemorySizeVisitor & visitor) const;
 
             ///@}
 
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index c36d85a5..f176a88f 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -22,16 +22,10 @@ namespace xo {
             this->input_state_.discard_current_line();
         }
 
-        std::size_t
-        Tokenizer::_n_store() const noexcept
+        void
+        Tokenizer::visit_pools(const MemorySizeVisitor & visitor) const
         {
-            return input_buffer_._n_store();
-        }
-
-        MemorySizeInfo
-        Tokenizer::_store_info(std::size_t i) const noexcept
-        {
-            return input_buffer_._store_info(i);
+            input_buffer_.visit_pools(visitor);
         }
 
         bool

From 0baa458c5b2770c8ff5bb49f35c0426237d04abf Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 13 Feb 2026 17:24:23 -0500
Subject: [PATCH 22/33] xo-reader2 stack: handle comparison expression (x == y)

---
 include/xo/tokenizer2/Token.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index ab3f0cb7..b211f967 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -129,6 +129,9 @@ namespace xo {
             /** token for @c "/" **/
             static Token slash_token() { return Token(tokentype::tk_slash); }
 
+            /** token for @c "==" **/
+            static Token cmpeq_token() { return Token(tokentype::tk_cmpeq); }
+
             /** token representing keyword @c type **/
             static Token type() { return Token(tokentype::tk_type); }
             /** token representing keyword @c def **/

From 6d039c03e6420168003aa48b19f459ef78b9578a Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 27 Feb 2026 19:38:53 +1100
Subject: [PATCH 23/33] xo-cmake: setup to make share target available via
 cmake install

---
 cmake/xo_tokenizer2Config.cmake.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/xo_tokenizer2Config.cmake.in b/cmake/xo_tokenizer2Config.cmake.in
index 13f1dac1..eccd2745 100644
--- a/cmake/xo_tokenizer2Config.cmake.in
+++ b/cmake/xo_tokenizer2Config.cmake.in
@@ -10,4 +10,5 @@ find_dependency(xo_arena)
 find_dependency(indentlog)
 
 include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Share.cmake")
 check_required_components("@PROJECT_NAME@")

From 9920812d4297b0f63abe160264ad1a5c707ca4d8 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sun, 1 Mar 2026 13:06:57 +1100
Subject: [PATCH 24/33] xo-reader2 stack: + #q token + QuoteSsm [WIP - not
 functional]

---
 include/xo/tokenizer2/Token.hpp     |  2 ++
 include/xo/tokenizer2/tokentype.hpp |  3 +++
 src/tokenizer2/Tokenizer.cpp        | 13 +++++++++++++
 src/tokenizer2/tokentype.cpp        |  5 +++--
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index b211f967..9ddd0181 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -87,6 +87,8 @@ namespace xo {
             static Token symbol_token(const std::string & txt) {
                 return Token(tokentype::tk_symbol, txt);
             }
+            /** token representing quote @c "'" **/
+            static Token quote() { return Token(tokentype::tk_quote); }
             /** token representing left angle bracket @c "<" **/
             static Token leftangle() { return Token(tokentype::tk_leftangle); }
             /** token representing right angle bracket @c ">" **/
diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
index 91cb3622..3f259f8d 100644
--- a/include/xo/tokenizer2/tokentype.hpp
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -64,6 +64,9 @@ namespace xo {
             /** a symbol **/
             tk_symbol,
 
+            /** quote @c ' **/
+            tk_quote,
+
             /** left-hand parenthesis @c '(' **/
             tk_leftparen,
 
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index f176a88f..8821cd65 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -32,6 +32,8 @@ namespace xo {
         Tokenizer::is_1char_punctuation(CharT ch)
         {
             switch(ch) {
+            case '\'':
+                return true;
             case '(':
                 return true;
             case ')':
@@ -418,6 +420,15 @@ namespace xo {
 
                 break;
             }
+            case '\'':
+            {
+                log && log("quote token");
+
+                tk_type = tokentype::tk_quote;
+                ++ix;
+
+                break;
+            }
             case 'a': case 'A':
             case 'b': case 'B':
             case 'c': case 'C':
@@ -593,6 +604,8 @@ namespace xo {
                     tk_type = tokentype::tk_in;
                 } else if (tk_text == "end") {
                     tk_type = tokentype::tk_end;
+                } else if (tk_text == "#q") {
+                    tk_type = tokentype::tk_quote;
                 } else {
                     /* keep as symbol */
                     keep_text = true;
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
index 40c2dbfb..0831940f 100644
--- a/src/tokenizer2/tokentype.cpp
+++ b/src/tokenizer2/tokentype.cpp
@@ -18,16 +18,17 @@ namespace xo {
                 CASE(tk_f64);
                 CASE(tk_string);
                 CASE(tk_symbol);
-                CASE(tk_leftparen);
 
+                CASE(tk_quote);
+                CASE(tk_leftparen);
                 CASE(tk_rightparen);
                 CASE(tk_leftbracket);
                 CASE(tk_rightbracket);
                 CASE(tk_leftbrace);
                 CASE(tk_rightbrace);
-
                 CASE(tk_leftangle);
                 CASE(tk_rightangle);
+
                 CASE(tk_lessequal);
                 CASE(tk_greatequal);
                 CASE(tk_dot);

From 650a9fa95f1851dc92720a289470ab37ec44ac8f Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Mon, 2 Mar 2026 11:05:12 +1100
Subject: [PATCH 25/33] xo-interpreter2 stack: handle operator expressions w/
 qliterals

---
 include/xo/tokenizer2/Token.hpp | 2 +-
 src/tokenizer2/Tokenizer.cpp    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 9ddd0181..9c748933 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -88,7 +88,7 @@ namespace xo {
                 return Token(tokentype::tk_symbol, txt);
             }
             /** token representing quote @c "'" **/
-            static Token quote() { return Token(tokentype::tk_quote); }
+            static Token quote_token() { return Token(tokentype::tk_quote); }
             /** token representing left angle bracket @c "<" **/
             static Token leftangle() { return Token(tokentype::tk_leftangle); }
             /** token representing right angle bracket @c ">" **/
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 8821cd65..323c2d8d 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -429,6 +429,7 @@ namespace xo {
 
                 break;
             }
+            case '#':
             case 'a': case 'A':
             case 'b': case 'B':
             case 'c': case 'C':

From 74642dfcfafc540e99a1b92a710f802025ebd91d Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Tue, 3 Mar 2026 12:12:09 +1100
Subject: [PATCH 26/33] xo-interpreter2 stack: + literal array parsing

---
 include/xo/tokenizer2/Token.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 9c748933..5f8be733 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -98,9 +98,9 @@ namespace xo {
             /** Token representing right parenthesis @c ")" **/
             static Token rightparen_token() { return Token(tokentype::tk_rightparen); }
             /** token representing left bracket @c "[" **/
-            static Token leftbracket() { return Token(tokentype::tk_leftbracket); }
+            static Token leftbracket_token() { return Token(tokentype::tk_leftbracket); }
             /** token representing right bracket @c "]" **/
-            static Token rightbracket() { return Token(tokentype::tk_rightbracket); }
+            static Token rightbracket_token() { return Token(tokentype::tk_rightbracket); }
             /** token representing left brace @c "{" **/
             static Token leftbrace_token() { return Token(tokentype::tk_leftbrace); }
             /** token representing right brace @c "}" **/

From 458fd04ca2b89c48673dd216d34ff930bd0d065b Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Wed, 11 Mar 2026 07:49:14 -0500
Subject: [PATCH 27/33] xo-reader2 stack: expand symbol table to store typedefs

+ typedef utest
+ misc qol policy choices
---
 include/xo/tokenizer2/Token.hpp     | 6 ++++--
 include/xo/tokenizer2/tokentype.hpp | 3 +++
 src/tokenizer2/Tokenizer.cpp        | 2 ++
 src/tokenizer2/tokentype.cpp        | 1 +
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 5f8be733..c7c43287 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -111,8 +111,8 @@ namespace xo {
             static Token comma_token() { return Token(tokentype::tk_comma); }
             /** token representing colon @c ":" **/
             static Token colon_token() { return Token(tokentype::tk_colon); }
-            /** token representing double-colo @c "::" **/
-            static Token doublecolon() { return Token(tokentype::tk_doublecolon); }
+            /** token representing double-colon @c "::" **/
+            static Token doublecolon_token() { return Token(tokentype::tk_doublecolon); }
             /** token representing semicolon @c ";" **/
             static Token semicolon_token() { return Token(tokentype::tk_semicolon); }
             /** token representing single-assignment @c "=" (editor bait: equal_token) **/
@@ -138,6 +138,8 @@ namespace xo {
             static Token type() { return Token(tokentype::tk_type); }
             /** token representing keyword @c def **/
             static Token def_token() { return Token(tokentype::tk_def); }
+            /** token representing keyword @c deftype **/
+            static Token deftype_token() { return Token(tokentype::tk_deftype); }
             /** token representing keyword @c lambda **/
             static Token lambda_token() { return Token(tokentype::tk_lambda); }
             /** token representing keyword @c if **/
diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
index 3f259f8d..d0290b05 100644
--- a/include/xo/tokenizer2/tokentype.hpp
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -146,6 +146,9 @@ namespace xo {
             /** keyword @c 'def' **/
             tk_def,
 
+            /** keyword @c 'deftype' **/
+            tk_deftype,
+
             /** keyword @c 'lambda' **/
             tk_lambda,
 
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 323c2d8d..f6ac7c2f 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -591,6 +591,8 @@ namespace xo {
                     tk_type = tokentype::tk_type;
                 } else if (tk_text == "def") {
                     tk_type = tokentype::tk_def;
+                } else if (tk_text == "deftype") {
+                    tk_type = tokentype::tk_deftype;
                 } else if (tk_text == "lambda") {
                     tk_type = tokentype::tk_lambda;
                 } else if (tk_text == "if") {
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
index 0831940f..7df59eec 100644
--- a/src/tokenizer2/tokentype.cpp
+++ b/src/tokenizer2/tokentype.cpp
@@ -51,6 +51,7 @@ namespace xo {
 
                 CASE(tk_type);
                 CASE(tk_def);
+                CASE(tk_deftype);
                 CASE(tk_lambda);
                 CASE(tk_if);
                 CASE(tk_then);

From caa8e31d025ad962ca34e31fc5597ea0e076af26 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Wed, 11 Mar 2026 14:13:48 -0500
Subject: [PATCH 28/33] xo-reader2: parse list types + utest

---
 include/xo/tokenizer2/Token.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index c7c43287..7537a2b0 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -90,9 +90,9 @@ namespace xo {
             /** token representing quote @c "'" **/
             static Token quote_token() { return Token(tokentype::tk_quote); }
             /** token representing left angle bracket @c "<" **/
-            static Token leftangle() { return Token(tokentype::tk_leftangle); }
+            static Token leftangle_token() { return Token(tokentype::tk_leftangle); }
             /** token representing right angle bracket @c ">" **/
-            static Token rightangle() { return Token(tokentype::tk_rightangle); }
+            static Token rightangle_token() { return Token(tokentype::tk_rightangle); }
             /** token representing left parenthesis @c "(" **/
             static Token leftparen_token() { return Token(tokentype::tk_leftparen); }
             /** Token representing right parenthesis @c ")" **/

From 6f95f38373e4c8d0bcf6b3de106d1422e56d2845 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Wed, 11 Mar 2026 16:19:40 -0500
Subject: [PATCH 29/33] xo-interpreter2: + nil + cons

---
 include/xo/tokenizer2/Token.hpp     | 2 ++
 include/xo/tokenizer2/tokentype.hpp | 3 +++
 src/tokenizer2/Tokenizer.cpp        | 2 ++
 src/tokenizer2/tokentype.cpp        | 1 +
 4 files changed, 8 insertions(+)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 7537a2b0..0968a9e9 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -134,6 +134,8 @@ namespace xo {
             /** token for @c "==" **/
             static Token cmpeq_token() { return Token(tokentype::tk_cmpeq); }
 
+            /** token representing keyword @c nil **/
+            static Token nil_token() { return Token(tokentype::tk_nil); }
             /** token representing keyword @c type **/
             static Token type() { return Token(tokentype::tk_type); }
             /** token representing keyword @c def **/
diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
index d0290b05..5f7e1937 100644
--- a/include/xo/tokenizer2/tokentype.hpp
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -140,6 +140,9 @@ namespace xo {
             /** operator @c '!=' **/
             tk_cmpne,
 
+            /** keyword @c 'nil' **/
+            tk_nil,
+
             /** keyword @c 'type' **/
             tk_type,
 
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index f6ac7c2f..2a6f8ed9 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -587,6 +587,8 @@ namespace xo {
                 if ((tk_text == "true") || (tk_text == "false")) {
                     tk_type = tokentype::tk_bool;
                     keep_text = true;
+                } else if (tk_text == "nil") {
+                    tk_type = tokentype::tk_nil;
                 } else if (tk_text == "type") {
                     tk_type = tokentype::tk_type;
                 } else if (tk_text == "def") {
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
index 7df59eec..b8a013da 100644
--- a/src/tokenizer2/tokentype.cpp
+++ b/src/tokenizer2/tokentype.cpp
@@ -49,6 +49,7 @@ namespace xo {
                 CASE(tk_cmpeq);
                 CASE(tk_cmpne);
 
+                CASE(tk_nil);
                 CASE(tk_type);
                 CASE(tk_def);
                 CASE(tk_deftype);

From 41f704f7ab484a94154d82ba3c4919affeda7f4e Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Thu, 12 Mar 2026 21:08:58 -0500
Subject: [PATCH 30/33] xo-reader2 stack: support op<=

---
 include/xo/tokenizer2/tokentype.hpp | 2 +-
 src/tokenizer2/Tokenizer.cpp        | 2 +-
 src/tokenizer2/tokentype.cpp        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
index 5f7e1937..18857af9 100644
--- a/include/xo/tokenizer2/tokentype.hpp
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -92,7 +92,7 @@ namespace xo {
             tk_rightangle,
 
             /** less-equal @c '<=' **/
-            tk_lessequal,
+            tk_cmple,
 
             /** great-equal @c '>=' **/
             tk_greatequal,
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 2a6f8ed9..76881e7f 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -486,7 +486,7 @@ namespace xo {
                 log && log("leftangle or lessequal token");
 
                 if (*(ix + 1) == '=') {
-                    tk_type = tokentype::tk_lessequal;
+                    tk_type = tokentype::tk_cmple;
                     ++ix;
                     ++ix;
                 } else {
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
index b8a013da..e704b38d 100644
--- a/src/tokenizer2/tokentype.cpp
+++ b/src/tokenizer2/tokentype.cpp
@@ -29,7 +29,7 @@ namespace xo {
                 CASE(tk_leftangle);
                 CASE(tk_rightangle);
 
-                CASE(tk_lessequal);
+                CASE(tk_cmple);
                 CASE(tk_greatequal);
                 CASE(tk_dot);
                 CASE(tk_comma);

From 4e7b58e3c95d0726d777f5310fd4eca5f5f93b76 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Thu, 12 Mar 2026 23:41:21 -0500
Subject: [PATCH 31/33] xo-reader2: + op>= support

---
 include/xo/tokenizer2/tokentype.hpp | 2 +-
 src/tokenizer2/Tokenizer.cpp        | 2 +-
 src/tokenizer2/tokentype.cpp        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/xo/tokenizer2/tokentype.hpp b/include/xo/tokenizer2/tokentype.hpp
index 18857af9..a5538131 100644
--- a/include/xo/tokenizer2/tokentype.hpp
+++ b/include/xo/tokenizer2/tokentype.hpp
@@ -95,7 +95,7 @@ namespace xo {
             tk_cmple,
 
             /** great-equal @c '>=' **/
-            tk_greatequal,
+            tk_cmpge,
 
             /** dot @c '.' **/
             tk_dot,
diff --git a/src/tokenizer2/Tokenizer.cpp b/src/tokenizer2/Tokenizer.cpp
index 76881e7f..88f03755 100644
--- a/src/tokenizer2/Tokenizer.cpp
+++ b/src/tokenizer2/Tokenizer.cpp
@@ -500,7 +500,7 @@ namespace xo {
                 log && log("rightangle or greatequal token");
 
                 if (*(ix + 1) == '=') {
-                    tk_type = tokentype::tk_greatequal;
+                    tk_type = tokentype::tk_cmpge;
                     ++ix;
                     ++ix;
                 } else {
diff --git a/src/tokenizer2/tokentype.cpp b/src/tokenizer2/tokentype.cpp
index e704b38d..c9749a34 100644
--- a/src/tokenizer2/tokentype.cpp
+++ b/src/tokenizer2/tokentype.cpp
@@ -30,7 +30,7 @@ namespace xo {
                 CASE(tk_rightangle);
 
                 CASE(tk_cmple);
-                CASE(tk_greatequal);
+                CASE(tk_cmpge);
                 CASE(tk_dot);
                 CASE(tk_comma);
                 CASE(tk_colon);

From 1d3af64a7a09589f7b9a2be0d0ac66d737114ab0 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Tue, 24 Mar 2026 23:32:09 -0400
Subject: [PATCH 32/33] xo-tokenizer2: + op!= utest

---
 include/xo/tokenizer2/Token.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/xo/tokenizer2/Token.hpp b/include/xo/tokenizer2/Token.hpp
index 0968a9e9..f2d8eb08 100644
--- a/include/xo/tokenizer2/Token.hpp
+++ b/include/xo/tokenizer2/Token.hpp
@@ -133,6 +133,8 @@ namespace xo {
 
             /** token for @c "==" **/
             static Token cmpeq_token() { return Token(tokentype::tk_cmpeq); }
+            /** token for @c "!=" **/
+            static Token cmpne_token() { return Token(tokentype::tk_cmpne); }
 
             /** token representing keyword @c nil **/
             static Token nil_token() { return Token(tokentype::tk_nil); }

From ff471bbc72eb51533a03d73f6c623697e1d8e7a3 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Fri, 27 Mar 2026 11:16:28 -0400
Subject: [PATCH 33/33] xo-interpreter2 stack: wrap TokenizerError as
 DRuntimeError

Also fix _read_eval_print() to report them!
---
 cmake/xo_tokenizer2Config.cmake.in       |  1 +
 include/xo/tokenizer2/TokenizerError.hpp |  6 ++++++
 src/tokenizer2/CMakeLists.txt            |  1 +
 src/tokenizer2/TokenizerError.cpp        | 14 ++++++++++++++
 4 files changed, 22 insertions(+)

diff --git a/cmake/xo_tokenizer2Config.cmake.in b/cmake/xo_tokenizer2Config.cmake.in
index eccd2745..0c0dad0b 100644
--- a/cmake/xo_tokenizer2Config.cmake.in
+++ b/cmake/xo_tokenizer2Config.cmake.in
@@ -6,6 +6,7 @@ include(CMakeFindDependencyMacro)
 #       must coordinate with xo_dependency() calls
 #       in src/tokenizer2/CMakeLists.txt
 #
+find_dependency(xo_stringtable2)
 find_dependency(xo_arena)
 find_dependency(indentlog)
 
diff --git a/include/xo/tokenizer2/TokenizerError.hpp b/include/xo/tokenizer2/TokenizerError.hpp
index bf7702b1..b08889bd 100644
--- a/include/xo/tokenizer2/TokenizerError.hpp
+++ b/include/xo/tokenizer2/TokenizerError.hpp
@@ -8,6 +8,8 @@
 #include "TkInputState.hpp"
 #include "tokentype.hpp"
 #include "span.hpp"
+#include <xo/stringtable2/String.hpp>
+#include <xo/alloc2/Allocator.hpp>
 #include <iomanip>
 
 namespace xo {
@@ -19,6 +21,7 @@ namespace xo {
          **/
         class TokenizerError {
         public:
+            using AAllocator = xo::mm::AAllocator;
             using CharT = char;
             using span_type = xo::mm::span<const CharT>;
 
@@ -89,6 +92,9 @@ namespace xo {
             /** Print human-oriented error report on @p os. **/
             void report(std::ostream & os) const;
 
+            /** Similar to report, but capture as string, allocated from @p mm **/
+            DString * report_to_string(obj<AAllocator> mm) const;
+
             ///@}
 
         private:
diff --git a/src/tokenizer2/CMakeLists.txt b/src/tokenizer2/CMakeLists.txt
index ccf1b551..3a748e70 100644
--- a/src/tokenizer2/CMakeLists.txt
+++ b/src/tokenizer2/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SELF_SRCS
 
 xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
 # deps must coordinate with xo-tokenizer/cmake/xo_tokenizer2Config.cmake.in
+xo_dependency(${SELF_LIB} xo_stringtable2)
 xo_dependency(${SELF_LIB} xo_arena)
 xo_dependency(${SELF_LIB} indentlog)
 
diff --git a/src/tokenizer2/TokenizerError.cpp b/src/tokenizer2/TokenizerError.cpp
index ffe3c8b4..c80996d9 100644
--- a/src/tokenizer2/TokenizerError.cpp
+++ b/src/tokenizer2/TokenizerError.cpp
@@ -54,6 +54,20 @@ namespace xo {
             }
         }
 
+        DString *
+        TokenizerError::report_to_string(obj<AAllocator> dest_mm) const
+        {
+            // FIXME:
+            // using heap here for scratch space.
+            // Would prefer to checkpoint + realloc.
+
+            std::stringstream ss;
+
+            this->report(ss);
+
+            return DString::from_str(dest_mm, ss.str());
+        }
+
     } /*namespace scm*/
 } /*namespace xo*/