+ xo-tokenizer2 xo-reader2 xo-expression2 xo-interpreter2

2nd gen schematika interpreter using fomo
2026-01-10 12:39:09 -05:00 · 2026-01-10 12:39:09 -05:00 · f7bd3b0db3
commit f7bd3b0db3
parent 95bd3c7fae
41 changed files with 3566 additions and 9 deletions
--- a/xo-tokenizer2/include/xo/tokenizer2/.gitkeep
+++ b/xo-tokenizer2/include/xo/tokenizer2/.gitkeep
--- a/xo-tokenizer2/include/xo/tokenizer2/TkInputState.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/TkInputState.hpp
@ -0,0 +1,230 @@
+/* @file TkInputState.hpp
+ *
+ * author: Roland Conybeare, Jun 2025
+ */
+
+#pragma once
+
+#include "span.hpp"
+
+namespace xo {
+    namespace scm {
+        /** enum to report outcome of @ref capture_current_line **/
+        enum class input_error {
+            /** normal return, input line successfully identified and captured **/
+            ok = 0,
+            /** incomplete input; should not have been submitted
+             *  to @ref capture_current_line.
+             *  note: submit last line of input with eof_flag=true
+             **/
+            incomplete,
+            N
+        };
+
+        /** @class input_state
+         *  @brief Track detailed input position for use in error messages
+         *
+         *  input characters fall into two categories:
+         *  - consumed: memory can be reclaimed/recycled
+         *  - buffered: memory will be retained unaltered until consumed
+         *
+         *  remarks:
+         *  - always in one of two states:
+         *    - empty
+         *    - contains exactly one line of input
+         *  - also record current input position.
+         *    Use this for example to identify where tokenizer rejected input.
+         *  - .current_pos advances by one token
+         *
+         *  - buffered characters always form a single contiguous range.
+         *  - input_state does not own any storage; storage is owned elsewhere
+         *
+         *  @text
+         *
+         *    <------------------.current_line------------------>
+         *                                   >  <-- .whitespace
+         *    cccccccccccccccccccccccccccccccc__TTTTTTTTxxxxxxxxx
+         *    ^                                 ^                ^
+         *    .current_line.lo                  |                .current_line.hi
+         *                           .current_pos
+         *
+         *    <----prev_line----> <----current_line---->
+         *                                   >  <--whitespace
+         *    ppppppppppppppppppp cccccccccccc__TTTTTTTT
+         *    ^
+         *
+         *  @endtext
+         **/
+        class TkInputState {
+        public:
+            /** @defgroup input-state-type-traits input-state type straits **/
+            ///@{
+
+            using CharT = char;
+
+            /** type representing a contiguous span of tokenizer input characters **/
+            using span_type = span<const CharT>;
+
+            ///@}
+
+        public:
+            /** @defgroup input-state-ctors input_state constructors **/
+            ///@{
+
+            TkInputState() = default;
+            explicit TkInputState(bool debug_flag) : debug_flag_{debug_flag} {}
+            /** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
+             *  Introduced for unit tests, not used in tokenizer.
+             **/
+            explicit TkInputState(const span<const CharT>& current_line,
+                                  size_t current_pos,
+                                  size_t whitespace) : current_line_{current_line},
+                                                       current_pos_{current_pos},
+                                                       whitespace_{whitespace} {}
+
+            ///@}
+
+            /** @defgroup input-state-static-methods input_state static methods **/
+            ///@{
+
+            /** recognize the newline character '\n' **/
+            static bool is_newline(CharT ch);
+            /** identifies whitespace chars.
+             *  These are chars that do not belong to any token.
+             *  They are not permitted to appear within
+             *  a symbol or string token.
+             *  Appearance of a whitespace char forces completioon of
+             *  preceding token.
+             **/
+            static bool is_whitespace(CharT ch);
+
+            ///@}
+
+            /** @defgroup input-state-access-methods **/
+            ///@{
+
+#pragma GCC diagnostic push
+#ifndef __APPLE__
+#pragma GCC diagnostic ignored "-Wchanges-meaning"
+#endif
+            const span_type & current_line() const { return current_line_; }
+#pragma GCC diagnostic pop
+            size_t tk_start() const { return tk_start_; }
+            size_t current_pos() const { return current_pos_; }
+            size_t whitespace() const { return whitespace_; }
+            bool debug_flag() const { return debug_flag_; }
+
+            ///@}
+
+            /** @defgroup input-state-general-methods **/
+            ///@{
+
+            /** Input state less @p n chars.
+             *  Use to recover input state before a complete but error-triggering token
+             **/
+            TkInputState rewind(std::size_t n) const;
+
+            /** Capture prefix of @p input up to first newline.
+             *  Set read position to start of line.
+             *
+             *  Alters:
+             *    .current_line
+             *    .current_pos
+             *
+             * Return pair comprising error code and input span representing first line
+             * (including trailing newline) from @p input.
+             **/
+            std::pair<input_error, span_type> capture_current_line(const span_type & input,
+                                                                   bool eof_flag);
+
+            /** atomically return current line while discarding it from input state
+             *
+             *  Alters
+             *    .current_line
+             *    .current_pos
+             *    .whitespace
+             **/
+            span_type consume_current_line();
+
+            /** Reset input state for start of next line.
+             *  Expression parser may use this to discard remainder of input line
+             *  after a parsing error.
+             *
+             * Alters:
+             *   .current_line
+             *   .current_pos
+             *   .whitespace
+             **/
+            void discard_current_line();
+
+            /** Advance input position by @p z
+             *
+             *  Alters:
+             *   .current_pos
+             **/
+            void advance(size_t z);
+
+            /** Advance .current_pos to pos.
+             *  Require: pos in @ref current_line_
+             **/
+            void advance_until(const CharT * pos);
+
+            /** Skip prefix of input, starting at current read position,
+             *  comprising only whitespace.
+             *
+             *  Presume input position is at end of token;
+             *  on return @ref whitespace_ counts number of whitespace characters
+             *  skipped.
+             *
+             *  Return pointer to first non-whitespace character after @ref current_pos_
+             *  or @ref current_line_.hi if reached end of buffered line.
+             *
+             *  Alters:
+             *    .whitespace
+             **/
+            const CharT * skip_leading_whitespace();
+
+            ///@}
+
+        private:
+            /** @defgroup input-state-instance-vars input_state instance variables **/
+            ///@{
+
+            /** remember current input line.  Used only to report errors **/
+            span<const CharT> current_line_ = span<const CharT>();
+            /** start of last token within @ref current_line_ **/
+            size_t tk_start_ = 0;
+            /** input position within @ref current_line_ **/
+            size_t current_pos_ = 0;
+            /** number of whitespace chars since end of preceding token,
+             *  or last newline, whichever is less
+             **/
+            size_t whitespace_ = 0;
+
+            /** true to log input activity */
+            bool debug_flag_ = false;
+
+            ///@}
+        }; /*TkInputState*/
+
+        inline std::ostream &
+        operator<<(std::ostream & os,
+                   const TkInputState & x)
+        {
+            using xo::print::unq;
+
+            os << "<input_state"
+               << xtag("tk", x.tk_start())
+               << xtag("pos", x.current_pos())
+               << xtag("line",
+                       unq(std::string_view(x.current_line().lo(),
+                                            x.current_line().hi())))
+               << xtag("whitespace", x.whitespace())
+            << ">";
+
+            return os;
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end TkInputState.hpp */
--- a/xo-tokenizer2/include/xo/tokenizer2/Token.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/Token.hpp
@ -0,0 +1,226 @@
+/* file Token.hpp
+ *
+ * author: Roland Conybeare, Jul 2024
+ */
+
+#pragma once
+
+#include "tokentype.hpp"
+#include "xo/indentlog/print/tag.hpp"
+#include <stdexcept>
+#include <ostream>
+#include <string>
+#include <cstdint>
+
+namespace xo {
+    namespace scm {
+        namespace detail {
+            /* compute a * b^p,  p >= 0 */
+            constexpr double
+            pow_aux(double a, double b, int p) {
+                while (p > 0) {
+                    if (p % 2 == 1) {
+                        /* a * b^p = a * b^(2q + 1) = a.b * 10^(2q) */
+                        a *= b;
+                        p -= 1;
+                    } else {
+                        /* a * b^p = a * b^(2q) = a * (b^2)^q */
+                        b = b * b;
+                        p /= 2;
+                    }
+                }
+
+                /* a * b^0 = a */
+                return a;
+            }
+
+            constexpr double
+            pow10(int p) {
+                if (p >= 0)
+                    return pow_aux(1.0, 10.0, p);
+                else
+                    return 1.0 / pow_aux(1.0, 10.0, -p);
+            }
+        }
+
+        /** @class token
+         *  @brief Represent a Schematika lexical token
+         **/
+        class Token {
+        public:
+            /** @defgroup token-ctors token constructors **/
+            ///@{
+
+            /** default ctor creates token with type @c tk_invalid **/
+            Token() = default;
+            /** create token with type @c tk_type and input text @c text **/
+            Token(tokentype tk_type, const std::string & text = "")
+                : tk_type_{tk_type}, text_{text} {}
+
+            /** create invalid token (same as null ctor, but explicit) **/
+            static Token invalid() { return Token(); }
+            /** Create token representing a boolean literal from text @p txt
+             *  @p txt must be @c true or @c false
+             **/
+            static Token bool_token(const std::string & txt) {
+                return Token(tokentype::tk_bool, txt);
+            }
+            /** Create token representing 64-bit signed integer literal parsed from decimal @p txt.
+             *  The string @p txt must be a decimal integer literal, since @ref i64_value re-parses @p txt.
+             **/
+            static Token i64_token(const std::string & txt) {
+                return Token(tokentype::tk_i64, txt);
+            }
+            /** create token representing 64-bit floating-point literal parsed from decimal @p txt
+             *  The string @p txt must be a decimal floating-point literal, since @ref f64_value re-parses @p txt.
+             **/
+            static Token f64_token(const std::string & txt) {
+                return Token(tokentype::tk_f64, txt);
+            }
+            /** create token representing literal string parsed from @p txt **/
+            static Token string_token(const std::string & txt) {
+                return Token(tokentype::tk_string, txt);
+            }
+            /** create token representing a symbol parsed from @p txt.
+             *  Note that not all strings are valid symbol names.
+             **/
+            static Token symbol_token(const std::string & txt) {
+                return Token(tokentype::tk_symbol, txt);
+            }
+            /** token representing left angle bracket @c "<" **/
+            static Token leftangle() { return Token(tokentype::tk_leftangle); }
+            /** token representing right angle bracket @c ">" **/
+            static Token rightangle() { return Token(tokentype::tk_rightangle); }
+            /** token representing left parenthesis @c "(" **/
+            static Token leftparen() { return Token(tokentype::tk_leftparen); }
+            /** Token representing right parenthesis @c ")" **/
+            static Token rightparen() { return Token(tokentype::tk_rightparen); }
+            /** token representing left bracket @c "[" **/
+            static Token leftbracket() { return Token(tokentype::tk_leftbracket); }
+            /** token representing right bracket @c "]" **/
+            static Token rightbracket() { return Token(tokentype::tk_rightbracket); }
+            /** token representing left brace @c "{" **/
+            static Token leftbrace() { return Token(tokentype::tk_leftbrace); }
+            /** token representing right brace @c "}' **/
+            static Token rightbrace() { return Token(tokentype::tk_rightbrace); }
+            /** token representing period @c "." **/
+            static Token dot() { return Token(tokentype::tk_dot); }
+            /** token representing comma @c "," **/
+            static Token comma() { return Token(tokentype::tk_comma); }
+            /** token representing colon @c ":" **/
+            static Token colon() { return Token(tokentype::tk_colon); }
+            /** token representing double-colo @c "::" **/
+            static Token doublecolon() { return Token(tokentype::tk_doublecolon); }
+            /** token representing semicolon @c ";" **/
+            static Token semicolon() { return Token(tokentype::tk_semicolon); }
+            /** token representing single-assignment @c "=" **/
+            static Token singleassign() { return Token(tokentype::tk_singleassign); }
+            /** token representing unrestricted assignment @c ":=" **/
+            static Token assign_token() { return Token(tokentype::tk_assign); }
+            /** token representing indirection @c "->" **/
+            static Token yields() { return Token(tokentype::tk_yields); }
+
+            /** token for @c "+" **/
+            static Token plus_token() { return Token(tokentype::tk_plus); }
+            /** token for @c "-" **/
+            static Token minus_token() { return Token(tokentype::tk_minus); }
+            /** token for @c "*" **/
+            static Token star_token() { return Token(tokentype::tk_star); }
+            /** token for @c "/" **/
+            static Token slash_token() { return Token(tokentype::tk_slash); }
+
+            /** token representing keyword @c type **/
+            static Token type() { return Token(tokentype::tk_type); }
+            /** token representing keyword @c def **/
+            static Token def() { return Token(tokentype::tk_def); }
+            /** token representing keyword @c lambda **/
+            static Token lambda() { return Token(tokentype::tk_lambda); }
+            /** token representing keyword @c if **/
+            static Token if_token() { return Token(tokentype::tk_if); }
+            /** token representing keyword @c else **/
+            static Token else_token() { return Token(tokentype::tk_else); }
+            /** token representing keyword @c let **/
+            static Token let() { return Token(tokentype::tk_let); }
+            /** token representing keyword @c in **/
+            static Token in() { return Token(tokentype::tk_in); }
+            /** token representing keyword @c end **/
+            static Token end() { return Token(tokentype::tk_end); }
+
+            ///@}
+
+            /** @defgroup token-access-methods **/
+            ///@{
+
+            tokentype tk_type() const { return tk_type_; }
+            const std::string & text() const { return text_; }
+
+            ///@}
+
+            /** @defgroup token-general-methods **/
+            ///@{
+
+            /** true if token understood to represent valid input
+             *  i.e. any token type except @c tk_invalid
+             **/
+            bool is_valid() const { return tk_type_ != tokentype::tk_invalid; }
+            /** true for sentinel token with type tk_invalid **/
+            bool is_invalid() const { return tk_type_ == tokentype::tk_invalid; }
+
+            /** true for tokens with variable text.  false for those with fixed textual representation **/
+            bool has_variable_text() const { return (tk_type_ == tokentype::tk_i64
+                                                     || tk_type_ == tokentype::tk_f64
+                                                     || tk_type_ == tokentype::tk_string
+                                                     || tk_type_ == tokentype::tk_symbol); }
+
+            /** expect input matching @c true or @c false **/
+            bool bool_value() const;
+
+            /** expect input matching @c [+|-][0-9][0-9]* **/
+            std::int64_t i64_value() const;
+
+            /** expect input matching @c [+|-][0-9]*[.][0-9]*[e|E][+|-][0-9]* **/
+            double f64_value() const;
+
+            /** print human-readable token representation on stream @p os **/
+            void print(std::ostream & os) const;
+
+            ///@}
+
+        private:
+            /** @defgroup token-instance-vars **/
+            ///@{
+
+            /** category for this token **/
+            tokentype tk_type_ = tokentype::tk_invalid;
+
+            /** characters comprising this token.
+             *  only provided for certain token types:
+             *
+             *    tk_i64
+             *    tk_f64
+             *    tk_string
+             *    tk_symbol
+             **/
+            std::string text_;
+
+            ///@}
+        };
+
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const Token & tk)
+        {
+            tk.print(os);
+            return os;
+        }
+    } /*namespace scm*/
+
+#ifndef ppdetail_atomic
+    namespace print {
+        PPDETAIL_ATOMIC(xo::scm::token<char>);
+    }
+#endif
+
+} /*namespace xo*/
+
+/* end Token.hpp */
--- a/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp
@ -0,0 +1,167 @@
+/* file Tokenizer.hpp
+ *
+ * author: Roland Conybeare, Jul 2024
+ */
+
+#pragma once
+
+#include "Token.hpp"
+#include "TkInputState.hpp"
+#include "span.hpp"
+#include "scan_result.hpp"
+#include "xo/indentlog/scope.hpp"
+#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <cassert>
+
+namespace xo {
+    namespace scm {
+        /** @class Tokenizer
+         *  @brief Parse a Schematika character stream into lexical tokens
+         *
+         *  Use:
+         *
+         *  @code
+         *    // see xo-tokenizer2/example/tokenrepl/tokenrepl.cpp
+         *    // for exact working code
+         *
+         *    using tokenizer_type = tokenizer<char>;
+         *    using span_type = tokenizer_type::span_type;
+         *
+         *    tokenizer_type tkz;
+         *    span_type input = ...;
+         *
+         *    while (!input.empty()) {
+         *        auto [tk, consumed, error] = tkz.scan(input);
+         *
+         *        if (tk.is_valid()) {
+         *            // do something with tk
+         *        } else if (error.is_error()) {
+         *            error.report(cout);
+         *            break;
+         *        }
+         *
+         *        input = input.after_prefix(consumed);
+         *    }
+         *
+         *    if endofinput {
+         *        auto [tk, consumed, error] = tzk.notify_eof()
+         *
+         *        // do something with (final) tk if tk.is_valid()
+         *    }
+         *
+         *  @endcode
+         *
+         * See tokentype.hpp for token types
+         **/
+        class Tokenizer {
+        public:
+            using CharT = char;
+            using token_type = Token;
+            using error_type = TokenizerError;
+            using span_type = span<const CharT>;
+            using input_state_type = TkInputState;
+            using result_type = scan_result;
+
+        public:
+            /** @defgroup tokenizer-ctors tokenizer constructors **/
+            ///@{
+
+            Tokenizer(bool debug_flag = false);
+
+            ///@}
+
+            /** @defgroup tokenizer-access-methods tokenizer access methods **/
+            ///@{
+
+#pragma GCC diagnostic push
+#ifndef __APPLE__
+#pragma GCC diagnostic ignored "-Wchanges-meaning"
+#endif
+            const TkInputState & input_state() const { return input_state_; }
+#pragma GCC diagnostic pop
+
+            ///@}
+
+            /** @defgroup tokenizer-general-methods tokenizer methods **/
+            ///@{
+
+            /** identifies punctuation chars.
+             *  These are chars that are not permitted to appear within
+             *  a symbol token.  Instead they force completion of
+             *  a preceding token,  and start a new token with themselves
+             **/
+            static bool is_1char_punctuation(CharT ch);
+
+            /** more-relaxed version of is_1char_punctuation.
+             *  Chars that are not permitted to appear within a symbol token,
+             *  but may form token combined with next character
+             **/
+            static bool is_2char_punctuation(CharT ch);
+
+            /** assemble token from text @p token_text.
+             *  @p initial_whitespace   Amount of whitespace input being consumed from input.
+             *  @p token_text subset of input_line representing a single token.
+             *  @p p_input_state input state containing input_line.  On exit current line cleared
+             *                   if error
+             *
+             *  retval.consumed will represent some possibly-empty prefix of @p input
+             **/
+            static scan_result assemble_token(std::size_t initial_whitespace,
+                                              const span_type & token_text,
+                                              TkInputState * p_input_state);
+
+            /** degenerate version of assemble_token() on reaching end-of-file **/
+            static scan_result assemble_final_token(const span_type & token_text,
+                                                    TkInputState * p_input_state);
+
+            /** true if tokenizer contains stored prefix of
+             *  possibly-incomplete token
+             **/
+            bool has_prefix() const { return !prefix_.empty(); }
+
+            /** scan for next input token,  given @p input.
+             *  Note:
+             *  - tokenizer can consume input (e.g. whitespace)
+             *    without completing a token
+             *  - input will remember the extent of the last line of input
+             *    for which parsing has begun, but not completed.
+             *    It's required that at least that portion of the input span
+             *    remain valid across scan(), scan2() calls
+             *
+             *  @return {parsed token, consumed span}
+             **/
+            scan_result scan(const span_type & input,
+                             bool eof_flag);
+
+            /** discard current line after error.  Just cleans up error-reporting state **/
+            void discard_current_line();
+
+            ///@}
+
+        private:
+            /** @defgroup tokenizer-instance-vars tokenizer instance variables **/
+            ///@{
+
+            /** track input state (line#,pos,..) for error messages.
+             *  There's an ordering problem here:
+             *  1. input_state_.skip_leading_whitespace() advances
+             *     current line automagically when it sees \n
+             *  2. need to capture value of @ref input_state_ _before_ newline
+             *  3. but neeed newline to end token
+             *  Also recall input_state_type needed for reporting errors.
+             **/
+            input_state_type input_state_;
+            /** Accumulate partial token here.
+             *  This will happen if input sent to @ref tokenizer::scan
+             *  ends without whitespace such that last available token's
+             *  extent is not determined
+             **/
+            std::string prefix_;
+
+            ///@}
+        }; /*tokenizer*/
+
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end Tokenizer.hpp */
--- a/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp
@ -0,0 +1,114 @@
+/* file TokenizerError.hpp
+ *
+ * author: Roland Conybeare, Jun 2025
+ */
+
+#pragma once
+
+#include "TkInputState.hpp"
+#include "tokentype.hpp"
+#include "span.hpp"
+#include <iomanip>
+
+namespace xo {
+    namespace scm {
+        /** @class tokenizer_error
+         *  @brief represent a lexing error, with context
+         *
+         *  @tparam CharT  representation for single characters
+         **/
+        class TokenizerError {
+        public:
+            using CharT = char;
+            using span_type = span<const CharT>;
+
+        public:
+            /** @defgroup tokenizer-error-ctors **/
+            ///@{
+
+            /** Default ctor represents a not-an-error sentinel object **/
+            TokenizerError() = default;
+            /** Constructor to capture parsing error context
+             *  @p tk_start   current position on entry to scanner
+             *  @p error_pos  error location relative to token start
+             **/
+            TokenizerError(const char * src_function,
+                           std::string error_description,
+                           const TkInputState & input_state,
+                           size_t error_pos)
+                : src_function_{src_function},
+                  error_description_{std::move(error_description)},
+                  input_state_{input_state},
+                  error_pos_{error_pos}
+                {
+                    scope log(XO_DEBUG(input_state.debug_flag()));
+
+                    log && log(xtag("input_state.current_pos", input_state.current_pos()),
+                               xtag("error_pos", error_pos));
+                }
+            ///@}
+
+            /** @defgroup tokenizer-error-access-methods **/
+            ///@{
+
+            const char * src_function() const { return src_function_; }
+            const std::string & error_description() const { return error_description_; }
+#pragma GCC diagnostic push
+#ifndef __APPLE__
+#pragma GCC diagnostic ignored "-Wchanges-meaning"
+#endif
+            const TkInputState & input_state() const { return input_state_; }
+#pragma GCC diagnostic pop
+            size_t tk_start() const { return input_state_.current_pos(); }
+            size_t whitespace() const { return input_state_.whitespace(); }
+            size_t error_pos() const { return error_pos_; }
+
+            ///@}
+
+            /** @defgroup tokenizer-error-general-methods **/
+            ///@{
+
+            /** true, except for a sentinel error object **/
+            bool is_error() const { return !error_description_.empty(); }
+            /** false except for object in sentinel state **/
+            bool is_not_an_error() const { return error_description_.empty(); }
+
+            /** Print representation to stream @p os. Intended for tokenizer diagnostics.
+             *  For Schematika errors prefer @ref report
+             **/
+            void print(std::ostream & os) const;
+
+            /** Print human-oriented error report on @p os. **/
+            void report(std::ostream & os) const;
+
+            ///@}
+
+        private:
+            /** @defgroup tokenizer-error-vars **/
+            ///@{
+
+            /** source location (in tokenizer) at which error identified **/
+            char const * src_function_ = nullptr;
+            /** static error description **/
+            std::string error_description_;
+            /** input state associated with this error.
+             *  Sufficient to precisely locate it with context.
+             **/
+            TkInputState input_state_;
+            /** position (relative to @ref tk_entry_) of error **/
+            size_t error_pos_ = 0;
+
+            ///@}
+        }; /*error_token*/
+
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const TokenizerError & tkerr)
+        {
+            tkerr.print(os);
+            return os;
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end tokenizer_error.hpp */
--- a/xo-tokenizer2/include/xo/tokenizer2/buffer.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/buffer.hpp
@ -0,0 +1,328 @@
+/** @file buffer.hpp **/
+
+#pragma once
+
+#include "span.hpp"
+#include <utility>
+#include <cstdint>
+#include <cassert>
+#include <new>
+
+namespace xo {
+    namespace scm {
+        /**
+         * @class buffer buffer.hpp
+         *
+         * @brief Container for a (possibly owned) FIFO queue of chars
+         *
+         * @tparam CharT.  buffer element type.
+         *
+         * @code
+         *  .buf
+         *
+         *    +------------------------------------------+
+         *    |  |  ...  |  | X|  ... | X|  |    ...  |  |
+         *    +------------------------------------------+
+         *     ^             ^            ^               ^
+         *     0             .lo          .hi             .buf_z
+         *
+         *                   <-contents-><----avail----->
+         * @endcode
+         *
+         * Buffer does not support wrapped content:
+         * content that has not been consumed always occupies contiguous memory.
+         *
+         * Example:
+         * @code
+         * // 1.
+         *   buffer<char> buf(64*1024);
+         *   buf.empty() -> true
+         *   buf.buf_z() -> 65536
+         *   buf.lo_pos() -> 0
+         *   buf.hi_pos() -> 65536
+         *   buf.contents() -> empty span
+         *   buf.avail() -> span entire buffer memory
+         *
+         *   // write to (a prefix of) buf.avail()
+         *   ::strncpy(buf.buf(), "hello, world\n", 13);
+         *   buf.produce(span_type(buf.buf(), buf.buf() + 13));
+         *
+         *   buf.lo_pos() -> 0
+         *   buf.hi_pos() -> 13
+         *   buf.contents() -> "hello, world\n";
+         *
+         *
+         *   // examine stored content (does not change buffer state)
+         *   auto span = buf.contents();
+         *   cerr << string_view(span.lo(), span.hi());  // "hello, world\n"
+         *
+         *   // consume (a prefix of) stored content
+         *   buf.consume(span.prefix(7);
+         *
+         *   buf.lo_pos() -> 7
+         *   buf.hi_pos() -> 13
+         *   buf.contents() -> "world\n"
+         *
+         *   // consuming all remain content resets to original state
+         *   buf.consume(buf.contents());
+         *
+         *   buf.empty() -> true
+         *   buf.hi_pos() -> 0     // not 13!
+         *
+         * // 2.
+         *   buffer<char> buf;
+         *   buf.empty() -> true
+         *   buf.buf_z() -> 0
+         *   buf.lo_pos() -> 0
+         *   buf.hi_pos() -> 0
+         *   buf.contents() -> empty span
+         *   buf.avail() -> empty span
+         *
+         *   // allocate memory separately from ctor
+         *   buf.alloc(64*1024);
+         * @endcode
+         **/
+        template <typename CharT>
+        class buffer {
+        public:
+            /** @brief typealias for span of CharT **/
+            using span_type = span<CharT>;
+            /** @brief typealias for buffer size (counts CharT's, not bytes) **/
+            using size_type = std::uint64_t;
+
+        public:
+            /** @brief create empty buffer.
+
+                Does not allocate any storage;  @see alloc
+            **/
+            buffer() = default;
+            /** @brief create empty buffer,  and possibly allocate storage.
+
+                @param buf_z    Buffer size.  allocate storage (owned by this buffer) if >0.
+                @param align_z  Align to this value,  e.g. 8 to align storage on an 8-byte boundary
+            **/
+            buffer(size_type buf_z,
+                   size_type align_z = sizeof(char))
+                : is_owner_{true},
+                  buf_{buf_z ? (new (std::align_val_t(align_z)) CharT [buf_z]) : nullptr},
+                  buf_z_{buf_z},
+                  lo_pos_{0},
+                  hi_pos_{0}
+                {}
+            /** @brief buffer is not copyable **/
+            buffer(buffer const & x) = delete;
+            /** @brief destructor.  Release storage if owned **/
+            ~buffer() { this->reset(); }
+
+            /** @name Access methods **/
+            ///@{
+
+            /** @brief start of buffer memory **/
+            CharT * buf() const { return buf_; }
+            /** @brief buffer size (number of characters) **/
+            size_type buf_z() const { return buf_z_; }
+            /** @brief current start position within buffer **/
+            size_type lo_pos() const { return lo_pos_; }
+            /** @brief current end position within buffer **/
+            size_type hi_pos() const { return hi_pos_; }
+
+            ///@}
+
+            /** @brief readonly access to a single buffer element.
+
+                Relative to start of buffer (ignores current consume position)
+            **/
+            CharT const & operator[](size_type i) const { return buf_[i]; }
+
+            /** @brief return span for current buffer contents **/
+            span_type contents() const { return span_type(buf_ + lo_pos_,
+                                                          buf_ + hi_pos_); }
+            /** @brief returns span for writable buffer contents (unused prefix following produce position **/
+            span_type avail() const { return span_type(buf_ + hi_pos_,
+                                                       buf_ + buf_z_); }
+
+            /** @brief @c true iff buffer is empty **/
+            bool empty() const { return lo_pos_ == hi_pos_; }
+
+
+            /**
+               @brief update buffer produce position, after (independently) writing contents of span to it
+
+               @pre left endpoint of @p span equals buffer produce position (@c .hi_pos)
+               @pre right endpoint of @p span within bounds of buffer memory range
+               @post right endpoint of @p span equals buffer produce position.
+            **/
+            void produce(span_type const & span) {
+                assert(span.lo() == buf_ + hi_pos_);
+
+                hi_pos_ += span.size();
+            }
+
+            /**
+               @brief update buffer consume position,  when done with contents of span
+
+               @pre left endpoint of @p span equals buffer consume position (@c .lo_pos)
+               @pre right endpoint of @p span within bounds of buffer memory range
+               @post Either
+               buffer is empty, with @c .lo_pos = @c .hi_pos = @c 0.
+               buffer is non-empty, right endpoint of @p span equals new buffer consume position.
+            **/
+            void consume(span_type const & span) {
+                if (span.size()) {
+                    assert(span.lo() == buf_ + lo_pos_);
+
+                    lo_pos_ += span.size();
+                } else {
+                    /* since .consume() that arrives at empty contents also resets .lo_pos .hi_pos,
+                     * we don't want to blow up when called with an empty span -- argument
+                     * may represent some pre-reset location in buffer
+                     */
+                }
+
+                if (lo_pos_ == hi_pos_) {
+                    lo_pos_ = 0;
+                    hi_pos_ = 0;
+                }
+            }
+
+            /**
+               @brief allocate buffer with desired amount of memory
+
+               @param buf_z     desired buffer size
+               @param align_z   alignment;  buffer memory will be aligned on this byte-boundary.
+            **/
+            void alloc(size_type buf_z, size_type align_z = sizeof(char)) {
+                /* properly reset (+ discard) any existing state */
+                this->reset();
+
+                is_owner_ = true;
+                if (buf_z)
+                    buf_ = new (std::align_val_t(align_z)) CharT [buf_z];
+                buf_z_ = buf_z;
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+            }
+
+            /**
+               @brief attach buffer to (unowned)  range of @p buf_z bytes starting at @p buf[0]
+
+               Buffer is not responsible for managing storage.
+
+               @post
+               1. buffer is empty
+               @post
+               2. buffer read position = buffer write position = 0
+            **/
+            void setbuf(CharT * buf, size_type buf_z) {
+                /* properly reset (+ discard) any existing state */
+                this->reset();
+
+                is_owner_ = false;
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+                buf_ = buf;
+                buf_z_ = buf_z;
+            }
+
+            /**
+               @brief revert buffer to empty state and possibly zero it
+
+               @param zero_buffer_flag   Zero buffer contents iff this is true
+
+               @post
+               1. buffer is empty
+               @post
+               2. buffer read position = buffer write position = 0
+            **/
+            void clear2empty(bool zero_buffer_flag) {
+                if (buf_ && zero_buffer_flag)
+                    explicit_bzero(buf_, buf_z_ * sizeof(CharT));
+
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+            }
+
+            /**
+               @brief swap representation with another buffer instance.
+            **/
+            void swap (buffer & x) {
+                std::swap(is_owner_, x.is_owner_);
+                std::swap(buf_, x.buf_);
+                std::swap(buf_z_, x.buf_z_);
+                std::swap(lo_pos_, x.lo_pos_);
+                std::swap(hi_pos_, x.hi_pos_);
+            }
+
+            /**
+               @brief reset buffer to an empty state and recover owned storage
+            **/
+            void reset() {
+                if (is_owner_ && buf_)
+                    delete [] buf_;
+
+                is_owner_ = false;
+                buf_ = nullptr;
+                buf_z_ = 0;
+                lo_pos_ = 0;
+                hi_pos_ = 0;
+            }
+
+            /**
+               @brief move-assignment operator.
+               @param x   right-hand-side to move from.
+
+               @post
+               @p x is in a valid, empty,
+            **/
+            buffer & operator= (buffer && x) {
+                is_owner_ = x.is_owner_;
+                buf_ = x.buf_;
+                buf_z_ = x.buf_z_;
+                lo_pos_ = x.lo_pos_;
+                hi_pos_ = x.hi_pos_;
+
+                x.is_owner_ = false;
+                x.lo_pos_ = 0;
+                x.hi_pos_ = 0;
+                x.buf_ = nullptr;
+                x.buf_z_ = 0;
+
+                return *this;
+            }
+
+            /** @brief buffer is not assignable */
+            buffer & operator= (buffer & x) = delete;
+
+        private:
+            /** @brief true iff buffer is responsible for freeing storage at @c buf_ **/
+            bool is_owner_ = false;
+            /** @brief buffer contents.  buffer memory comprises @c buf_[0] to @c buf_[buf_z_] **/
+            CharT * buf_ = nullptr;
+            /** @brief buffer size (in units of CharT) **/
+            size_type buf_z_ = 0;
+
+            /** @brief buffer read (consume) position
+
+                @invariant
+                0 <= lo_pos_ <= hi_pos_ < buf_z_
+            **/
+            size_type lo_pos_ = 0;
+            /** @brief buffer write (produce) position
+
+                @invariant
+                0 <= hi_pos_ < hi_pos_ < buf_z_
+            **/
+            size_type hi_pos_ = 0;
+        };
+
+        /** @brief Overload for @c swap,  so that @c buffer<CharT> swappable **/
+        template <typename CharT>
+        inline void
+        swap(buffer<CharT> & lhs,
+             buffer<CharT> & rhs) {
+            lhs.swap(rhs);
+        }
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end buffer.hpp */
--- a/xo-tokenizer2/include/xo/tokenizer2/scan_result.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/scan_result.hpp
@ -0,0 +1,81 @@
+/* file scan_result.hpp
+ *
+ * author: Roland Conybeare, Jun 2025
+ */
+
+#pragma once
+
+#include "Token.hpp"
+#include "TokenizerError.hpp"
+#include "TkInputState.hpp"
+
+namespace xo {
+    namespace scm {
+        /** @class scan_result
+         *  @brief Represent result of parsing one input token.
+         *
+         * @code
+         *  Possible outcomes fall into several categories
+         *  (with T: @c token_.is_valid(), E: @cerror_.is_error())
+         *
+         *  | T     | E     | description                         |
+         *  |-------+-------+-------------------------------------|
+         *  | false | false | end of input, including end of line |
+         *  | true  | false | parsed token in T                   |
+         *  | false | true  | parse error in E                    |
+         *
+         * @endcode
+         **/
+        class scan_result {
+        public:
+            using CharT = char;
+            using token_type = Token;
+            using span_type = span<const CharT>;
+            using error_type = TokenizerError;
+            using input_state_type = TkInputState;
+
+        public:
+            scan_result(const Token & token,
+                        const span_type & consumed,
+                        const TokenizerError & error = TokenizerError())
+                : token_{token}, consumed_{consumed}, error_{error} {}
+
+            static scan_result make_whitespace(const span_type & prefix_input);
+            static scan_result make_partial(const span_type & prefix_input);
+            /**
+             *  @p error_src can be __FUNCTION__ from site where error generated.
+             *  @p error_msg error message
+             *  @p error_pos error position, relative to start of token
+             *  @p input_state_ref input state object;
+             *  copied into scan_result, and leaving input_state_ref.current_line cleared
+             **/
+            static scan_result make_error_consume_current_line(const char * error_src,
+                                                               std::string error_msg,
+                                                               size_t error_pos,
+                                                               input_state_type & input_state_ref);
+
+            bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
+            bool is_token() const { return token_.is_valid(); }
+            bool is_error() const { return error_.is_error(); }
+
+            const Token & get_token() const { return token_; }
+            const span_type & consumed() const { return consumed_; }
+            const TokenizerError & error() const { return error_; }
+
+        public:
+            /** Successfully parsed token, whenever tk_type != tokentype::tk_invalid.
+             *  Will be tokentype::tk_invalid in normal cause of events for valid input,
+             *  when consuming whitespace
+             **/
+            token_type token_;
+            /** input span represented by .token, on success. Otherwise not defined **/
+            span_type consumed_;
+            /** error description, whenever .error_.is_error() is true **/
+            TokenizerError error_;
+        };
+
+
+    } /*namespace scm*/
+} /*namespace xo*/
+
+/* end scan_result.hpp */
--- a/xo-tokenizer2/include/xo/tokenizer2/span.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/span.hpp
@ -0,0 +1,291 @@
+/** @file span.hpp **/
+
+#pragma once
+
+#include "xo/indentlog/scope.hpp"
+#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <ostream>
+#include <cstdint>
+#include <cassert>
+
+namespace xo {
+    namespace scm {
+        /** @class span compression/span.hpp
+         *
+         *  @brief A contiguous range of characters,  without ownership.
+         *
+         *  @tparam CharT type for elements referred to by this span.
+         **/
+        template <typename CharT>
+        class span {
+        public:
+            /** @defgroup span-type-traits span type traits **/
+            ///@{
+
+            /** typealias for span size (in units of CharT) **/
+            using size_type = std::uint64_t;
+
+            ///@}
+
+        public:
+            /** @defgroup span-ctors span constructors **/
+            ///@{
+
+            /** null span **/
+            span() : lo_{nullptr}, hi_{nullptr} {}
+
+            /** Create span for the contiguous memory range [@p lo, @p hi) **/
+            span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}
+
+            /** explicit conversion from span<U> **/
+            template<typename CharU>
+            span(const span<CharU> & other,
+                 std::enable_if_t<std::is_convertible_v<CharU*, CharT*>
+                 && !std::is_same_v<CharU, CharT>> * = nullptr)
+                : lo_{other.lo()}, hi_{other.hi()} {}
+
+            /** copy ctor (explicit to avoid ambiguity with template ctor) **/
+            span(const span & other) = default;
+            span & operator=(const span & other) = default;
+
+            /** Create a null span (i.e. with null @p lo, @p hi pointers)
+             *  A null span can be concatenated with any other span
+             *  without triggering matching-endpoint asserts.
+             **/
+            static span make_null() { return span(static_cast<CharT*>(nullptr), static_cast<CharT*>(nullptr)); }
+
+            /** @brief create span for C-style string @p cstr **/
+            static span from_cstr(const CharT * cstr) {
+                CharT * lo = cstr;
+                CharT * hi = cstr ? cstr + strlen(cstr) : nullptr;
+
+                return span(lo, hi);
+            }
+
+            /** @brief create span from std::string @p str **/
+            static span from_string(const std::string& str) {
+                CharT * lo = &(*str.begin());
+                CharT * hi = &(*str.end());
+
+                return span(lo, hi);
+            }
+
+            /** @brief concatenate two contiguous spans */
+            static span concat(const span & span1, const span & span2) {
+                if (span1.is_null())
+                    return span2;
+                if (span2.is_null())
+                    return span1;
+
+                if (span1.hi() != span2.lo()) {
+                    scope log(XO_DEBUG(true));
+
+                    log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo()));
+                }
+
+                assert(span1.hi() == span2.lo());
+
+                CharT * lo = span1.lo();
+                CharT * hi = span2.hi();
+
+                return span(lo, hi);
+            }
+
+            ///@}
+
+            /** @defgroup span-access-methods **/
+            ///@{
+
+            CharT * lo() const { return lo_; } /* get member span::lo_ */
+            CharT * hi() const { return hi_; } /* get member span::hi_ */
+
+            ///@}
+
+            /** @defgroup span-general-methods **/
+            ///@{
+
+            /** @brief strip prefix until first occurence of '\n', including the newline **/
+            void discard_until_newline() {
+                for (const CharT * p = lo_; p < hi_; ++p) {
+                    if (*p == '\n') {
+                        lo_ = p + 1;
+                        return;
+                    }
+                }
+
+                lo_ = hi_;
+            }
+
+            /** Create new span over supplied type,
+             *  with identical (possibly misaligned) endpoints.
+             *
+             *  @warning
+             *  1. New span uses exactly the same memory addresses.
+             *     Endpoint pointers may not be aligned.
+             *  2. Implementation assumes code compiled with
+             *     @code -fno-strict-aliasing @endcode enabled.
+             *
+             *  @tparam OtherT element type for new span
+             **/
+            template <typename OtherT>
+            span<OtherT>
+            cast() const { return span<OtherT>(reinterpret_cast<OtherT *>(lo_),
+                                               reinterpret_cast<OtherT *>(hi_)); }
+
+            /** @brief create span including the first @p z members of this span. **/
+            span prefix(size_type z) const { return span(lo_, lo_ + z); }
+
+            /** @brief create span representing prefix up to (but not including) @p *p
+             **/
+            span prefix_upto(CharT * p) const {
+                if (p <= hi_)
+                    return span(lo_, p);
+                else
+                    return span(lo_, hi_);
+            }
+
+            /** @brief create span with first @p z members of this span removed **/
+            span after_prefix(size_type z) const {
+                if (lo_ + z > hi_)
+                    z = hi_ - lo_;
+
+                return span(lo_ + z, hi_);
+            }
+
+            /** @brief create span with @p prefix of this span removed **/
+            span after_prefix(const span & prefix) const {
+                if (!prefix.is_null() && (prefix.lo() != lo_)) {
+                    throw std::runtime_error
+                        ("after_prefix: expected prefix of this span");
+                }
+
+                return after_prefix(prefix.size());
+            }
+
+            /** Create span starting with position @p p.
+             *  Does boundary checking; will return empty span if @p p is outside @c [lo_,hi)
+             **/
+            span suffix_from(CharT * p) const {
+                if ((lo_ <= p) && (p <= hi_))
+                    return span(p, hi_);
+                else
+                    return span(hi_, hi_);
+            }
+
+            /** true iff this span is null.  distinct from empty. **/
+            bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
+            /** true iff this span is empty (comprises 0 elements). **/
+            bool empty() const { return lo_ == hi_; }
+            /** report the number of elements (of type CharT) in this span. **/
+            size_type size() const { return hi_ - lo_; }
+
+            /** increase extent of this spans to include @p x.
+             *  Requires @c hi() == @c x.lo()
+             **/
+            span & operator+=(const span & x) {
+                if (hi_ == x.lo_) {
+                    hi_ = x.hi_;
+                } else if (!x.is_null()) {
+                    assert(false);
+                }
+
+                return *this;
+            }
+
+            /** print representation for this span on stream @p os **/
+            void print(std::ostream & os) const {
+                os << "<span"
+                   << xtag("addr", (void*)lo_)
+                   << xtag("size", size())
+                   << " :text " << xo::print::quot(std::string_view(lo_, hi_))
+                   << ">";
+            }
+            ///@}
+
+        private:
+            /** @defgroup span-instance-vars **/
+            ///@{
+
+            /** start of span.
+                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
+            **/
+            CharT * lo_ = nullptr;
+
+            /** @brief end of span.
+                Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
+            **/
+            CharT * hi_ = nullptr;
+
+            ///@}
+        }; /*span*/
+
+        /** @defgroup span-operators **/
+        ///@{
+
+        /** compare spans for equality.
+         *  Two spans are equal iff both endpoints match exactly.
+         **/
+        template <typename CharT>
+        inline bool
+        operator==(const span<CharT> & lhs, const span<CharT> & rhs) {
+            return ((lhs.lo() == rhs.lo())
+                    && (lhs.hi() == rhs.hi()));
+        }
+
+        /** compare spans for inequality.
+         *  Two spans are unequal if either paired endpoint differs.
+         **/
+        template <typename CharT>
+        inline bool
+        operator!=(const span<CharT> & lhs, const span<CharT> & rhs) {
+            return ((lhs.lo() != rhs.lo())
+                    || (lhs.hi() != rhs.hi()));
+        }
+
+        /** print a summary of @p x on stream @p os. Intended for diagnostics **/
+        template <typename CharT>
+        inline std::ostream &
+        operator<<(std::ostream & os,
+                   const span<CharT> & x) {
+            x.print(os);
+            return os;
+        }
+
+        ///@}
+    } /*namespace scm*/
+
+    namespace print {
+        template <typename CharT>
+        class printspan_impl {
+        public:
+            printspan_impl(xo::scm::span<CharT> x) : span_{x} {}
+
+            xo::scm::span<CharT> span_;
+        };
+
+        template <typename CharT>
+        printspan_impl<CharT> printspan(const xo::scm::span<CharT>& span) {
+            return printspan_impl<CharT>(span);
+        }
+
+        template <typename CharT>
+        inline std::ostream &
+        operator<< (std::ostream & os,
+                    const printspan_impl<CharT> & x)
+        {
+            for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p)
+                os << *p;
+
+            return os;
+        }
+
+#ifndef ppdetail_atomic
+        template <typename CharT>        \
+        PPDETAIL_ATOMIC_BODY(printspan_impl<CharT>);
+
+        template <typename CharT>        \
+        PPDETAIL_ATOMIC_BODY(xo::scm::span<CharT>);
+#endif
+
+    }
+} /*namespace xo*/
--- a/xo-tokenizer2/include/xo/tokenizer2/tokentype.hpp
+++ b/xo-tokenizer2/include/xo/tokenizer2/tokentype.hpp
@ -0,0 +1,192 @@
+/** @file tokentype.hpp
+ *
+ *  author: Roland Conybeare, Jul 2024
+ **/
+
+#pragma once
+
+#include "xo/indentlog/print/tag.hpp" // for STRINGIFY
+#include "xo/indentlog/print/ppdetail_atomic.hpp"
+#include <ostream>
+
+namespace xo {
+    namespace scm {
+        /** @enum tokentype
+         *  Enum to identify different schematika input token types
+         *
+         *  Schematica code examples:
+         *
+         *  @code
+         *    type point :: { xcoord : f64, ycoord : f64 };
+         *    type matrix :: array<double, 2>;  // 2-d array
+         *
+         *    decl hypot(x : f64, y : f64) -> f64;
+         *
+         *    def hypot(x : f64, y : f64) {
+         *      let
+         *        x2 = (x * x);
+         *        y2 = (y * y);
+         *        hypot2 = (x2 + y2);
+         *      in
+         *        sqrt(hypot2);
+         *    };
+         *
+         *    def someconst 4;
+         *
+         *    def foo(v : vec<i32>) {
+         *      def (pi : f64) = 3.1415926;
+         *      def (h : (f64,f64) -> f64) = hypot;
+         *
+         *      h = hypot3;
+         *    };
+         *
+         *    def matrixproduct(x : matrix, y : matrix) {
+         *      [i, j : x.row(i) * y.col(j)];
+         *    };
+         *  @endcode
+         **/
+        enum class tokentype {
+            /** sentinel value **/
+            tk_invalid = -1,
+
+            /** a boolean constant **/
+            tk_bool,
+
+            /** an integer constant (signed 64-bit integer) **/
+            tk_i64,
+
+            /** a 64-bit floating-point constant **/
+            tk_f64,
+
+            /** a string literal **/
+            tk_string,
+
+            /** a symbol **/
+            tk_symbol,
+
+            /** left-hand parenthesis @c '(' **/
+            tk_leftparen,
+
+            /** right-hand parenthesis @c ')' **/
+            tk_rightparen,
+
+            /** left-hand bracket @c '[' **/
+            tk_leftbracket,
+
+            /** right-hand bracket @c ']' **/
+            tk_rightbracket,
+
+            /** left-hand brace @c '{' **/
+            tk_leftbrace,
+
+            /** right-hand brace @c '}' **/
+            tk_rightbrace,
+
+            /** left-hand angle bracket @c '<' **/
+            tk_leftangle,
+
+            /** right-hand angle bracket @c '>' **/
+            tk_rightangle,
+
+            /** less-equal @c '<=' **/
+            tk_lessequal,
+
+            /** great-equal @c '>=' **/
+            tk_greatequal,
+
+            /** dot @c '.' **/
+            tk_dot,
+
+            /** comma @c ',' **/
+            tk_comma,
+
+            /** colon @c ':' **/
+            tk_colon,
+
+            /** double-colon @c '::' **/
+            tk_doublecolon,
+
+            /** semi-colon @c ';' **/
+            tk_semicolon,
+
+            /** single equals sign @c '=' **/
+            tk_singleassign,
+
+            /** assignment @c ':=' **/
+            tk_assign,
+
+            /** indirection @c '->' **/
+            tk_yields,
+
+            /** note: operators not treated as punctuation
+             *  'do-always' is a legal variable name,
+             *  as is 'maybe*2', 'maybe+1', 'path/to/foo'
+             **/
+
+            /** operator @c '+' **/
+            tk_plus,
+            /** operator @c '-' **/
+            tk_minus,
+            /** operator @c '*' **/
+            tk_star,
+            /** operator @c '/' **/
+            tk_slash,
+
+            /** operator @c '==' **/
+            tk_cmpeq,
+            /** operator @c '!=' **/
+            tk_cmpne,
+
+            /** keyword @c 'type' **/
+            tk_type,
+
+            /** keyword @c 'def' **/
+            tk_def,
+
+            /** keyword @c 'lambda' **/
+            tk_lambda,
+
+            /** keyword @c 'if' **/
+            tk_if,
+
+            /** keyworkd @c 'then' **/
+            tk_then,
+
+            /** keyword @c 'else' **/
+            tk_else,
+
+            /** keyword @c 'let' **/
+            tk_let,
+
+            /** keyword @c 'in' **/
+            tk_in,
+
+            /** keyword @c 'end' **/
+            tk_end,
+
+            /** counts number of entries **/
+            n_tokentype
+        }; /*tokentype*/
+
+        /** String representation for enum value.
+         *  For example @c tokentype_descr(tokentype::tk_if) -> @c "if"
+         **/
+        extern char const *
+        tokentype_descr(tokentype tk_type);
+
+        /** Print enum value for @p tk_type on stream @p os **/
+        inline std::ostream &
+        operator<< (std::ostream & os, tokentype tk_type) {
+            os << tokentype_descr(tk_type);
+            return os;
+        }
+    } /*namespace scm*/
+
+#ifndef ppdetail_atomic
+    namespace print {
+        PPDETAIL_ATOMIC(xo::scm::tokentype);
+    } /*namespace print*/
+#endif
+} /*namespace xo*/
+
+/* end tokentype.hpp */