xo-tokenizer: bugfix: yields token works + 2phase utest

2025-06-22 16:16:23 -05:00 · 2025-06-22 16:16:23 -05:00 · 27ef5701ac
commit 27ef5701ac
parent 8435734b45
8 changed files with 722 additions and 312 deletions
--- a/xo-reader/src/reader/reader.cpp
+++ b/xo-reader/src/reader/reader.cpp
@ -32,10 +32,10 @@ namespace xo {
            while (!input.empty()) {
                /* read one token from input */
                auto sr = this->tokenizer_.scan2(input, eof);
-                const auto & tk = sr.first;
-                const span_type & used_span = sr.second;
+                const auto & tk = sr.get_token();
+                const span_type & used_span = sr.consumed();

-                log && log(xtag("used_span", used_span));
+                log && log(xtag("consumed", used_span));
                log && log(xtag("input.pre", input));

                input = input.after_prefix(used_span);
--- a/xo-tokenizer/CMakeLists.txt
+++ b/xo-tokenizer/CMakeLists.txt
@ -19,9 +19,11 @@ add_definitions(${PROJECT_CXX_FLAGS})
 # ----------------------------------------------------------------

 add_subdirectory(src/tokenizer)
+add_subdirectory(example)
 add_subdirectory(utest)
+xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)

 # ----------------------------------------------------------------
-# provide find_package() support
-
-xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)
+# docs targets depend on all the other library/utest targets
+#
+add_subdirectory(docs)
--- a/xo-tokenizer/include/xo/tokenizer/span.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/span.hpp
@ -2,6 +2,7 @@

 #pragma once

+#include "xo/indentlog/scope.hpp"
 #include <ostream>
 #include <cstdint>
 #include <cassert>
@ -24,6 +25,9 @@ namespace xo {
            /** @brief create span for the contiguous memory range [@p lo, @p hi) **/
            span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}

+            /** @brief create a null span (i.e. with null @p lo, @p hi pointers) **/
+            static span make_null() { return span(nullptr, nullptr); }
+
            /** @brief create span for C-style string @p cstr **/
            static span from_cstr(const CharT * cstr) {
                CharT * lo = cstr;
@ -32,6 +36,35 @@ namespace xo {
                return span(lo, hi);
            }

+            /** @brief create span from std::string @p str **/
+            static span from_string(const std::string& str) {
+                CharT * lo = &(*str.begin());
+                CharT * hi = &(*str.end());
+
+                return span(lo, hi);
+            }
+
+            /** @brief concatenate two contiguous spans */
+            static span concat(const span & span1, const span & span2) {
+                if (span1.is_null())
+                    return span2;
+                if (span2.is_null())
+                    return span1;
+
+                if (span1.hi() != span2.lo()) {
+                    scope log(XO_DEBUG(true));
+
+                    log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo()));
+                }
+
+                assert(span1.hi() == span2.lo());
+
+                CharT * lo = span1.lo();
+                CharT * hi = span2.hi();
+
+                return span(lo, hi);
+            }
+
            ///@{

            /** @name getters **/
@ -96,6 +129,8 @@ namespace xo {
                    return span(hi_, hi_);
            }

+            /** @brief true iff this span is null.  distinct from empty. **/
+            bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
            /** @brief true iff this span is empty (comprises 0 elements). **/
            bool empty() const { return lo_ == hi_; }
            /** @brief report the number of elements (of type CharT) in this span. **/
--- a/xo-tokenizer/include/xo/tokenizer/token.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/token.hpp
@ -80,7 +80,10 @@ namespace xo {
            static token assign_token() { return token(tokentype::tk_assign); }
            static token yields() { return token(tokentype::tk_yields); }

+            static token plus_token() { return token(tokentype::tk_plus); }
+            static token minus_token() { return token(tokentype::tk_minus); }
            static token star_token() { return token(tokentype::tk_star); }
+            static token slash_token() { return token(tokentype::tk_slash); }

            static token type() { return token(tokentype::tk_type); }
            static token def() { return token(tokentype::tk_def); }
@ -355,5 +358,4 @@ namespace xo {
    } /*Namespace scm*/
 } /*namespace xo*/

-
 /* end token.hpp */
--- a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
@ -7,6 +7,7 @@

 #include "token.hpp"
 #include "span.hpp"
+#include "scan_result.hpp"
 #include "xo/indentlog/scope.hpp"
 #include <cassert>

@ -21,7 +22,7 @@ namespace xo {
         *    tokenizer_type tkz;
         *    span_type input = ...;
         *
-         *    while !input.empty() {
+         *    while (!input.empty()) {
         *        auto res = tkz.scan(input);
         *        const auto & tk = res.first;
         *
@ -39,22 +40,27 @@ namespace xo {
         *    // expect !tkz.has_prefix()
         *
         *  @endcode
+         *
+         * See tokentype.hpp for token types
         **/
        template <typename CharT>
        class tokenizer {
        public:
            using token_type = token<CharT>;
            using span_type = span<const CharT>;
-            using scan_result = std::pair<token_type, span_type>;
+            using result_type = scan_result<CharT>;

        public:
-            tokenizer() = default;
+            tokenizer(bool debug_flag = false);
+
+            /** recognize the newline character '\n' **/
+            bool is_newline(CharT ch) const;

            /** identifies whitespace chars.
             *  These are chars that do not belong to any token.
             *  They are not permitted to appear within
             *  a symbol or string token.
-             *  Appearance of a whitespace char forces completion of
+             *  Appearance of a whitespace char forces completioon of
             *  preceding token.
             **/
            bool is_whitespace(CharT ch) const;
@ -77,28 +83,59 @@ namespace xo {
             **/
            bool has_prefix() const { return !prefix_.empty(); }

-            /** assemble token from text @p token_text
+            /** assemble token from text @p token_text.
+             *  @p token_text will often but not always represent a subset of @p input.
+             *  (For example consider multi-line string literals)
+             *  Also the span @p token_text may (in uncommon cases)
+             *  have been copied to separate storage from @p input
+             *
+             *  @p initial_whitespace   Amount of whitespace input being consumed from input.
+             *  @p initial_token_prefix_from_input  Amount of non-whitespace input being
+             *  consumed from input. Not counting any stashed-and-already-consumed input
+             *
+             *  retval.consumed will represent some possibly-empty prefix of @p input
             **/
-            token_type assemble_token(const span_type & token_text) const;
+            result_type assemble_token(std::size_t initial_whitespace,
+                                       std::size_t initial_token_prefix_from_input,
+                                       const span_type & token_text,
+                                       const span_type & input) const;
+
+            /** degenerate version of assemble_token() on reaching end-of-file **/
+            result_type assemble_final_token(const span_type & token_text) const;

            /** scan for next input token,  given @p input.
-             *  Note tokenizer can consume input (e.g. whitespace)
-             *  without completing a token
+             *  Note:
+             *  - tokenizer can consume input (e.g. whitespace)
+             *    without completing a token
+             *  - input will remember the extent of the last line of input
+             *    for which parsing has begun, but not completed.
+             *    It's required that at least that portion of the input span
+             *    remain valid across scan(), scan2() calls
             *
             *  @return {parsed token, consumed span}
             **/
-            scan_result scan(const span_type & input);
+            result_type scan(const span_type & input);

            /** When eof is false, same as scan(input).
             *  When eof is true and scan(input) does not report a token,
             *  return notify_eof()
             **/
-            scan_result scan2(const span_type & input, bool eof);
+            result_type scan2(const span_type & input, bool eof);

-            /** notify end of input,  resolve any stored input **/
-            token_type notify_eof();
+            /** notify end of input,  resolving any ambiguous input stashed in .prefix
+             **/
+            result_type notify_eof(const span_type & input);

        private:
+            result_type scan_completion(const span_type & whitespace,
+                                        const CharT* token_end,
+                                        const span_type & input);
+
+        private:
+            /** true to log tokenizer activity to stdout **/
+            bool debug_flag_ = false;
+            /** remember start of current line here **/
+            span_type current_line_ = span_type::make_null();
            /** Accumulate partial token here.
             *  This will happen if input sent to @ref tokenizer::scan
             *  ends without a determinate token boundary.
@ -106,6 +143,17 @@ namespace xo {
            std::string prefix_;
        }; /*tokenizer*/

+        template <typename CharT>
+        tokenizer<CharT>::tokenizer(bool debug_flag)
+            : debug_flag_{debug_flag}
+        {}
+
+        template <typename CharT>
+        bool
+        tokenizer<CharT>::is_newline(CharT ch) const {
+            return (ch == '\n');
+        }
+
        template <typename CharT>
        bool
        tokenizer<CharT>::is_whitespace(CharT ch) const {
@ -126,7 +174,10 @@ namespace xo {
            case '<':
                return true;
            case '>':
-                return true;
+                /* can't be punctuation
+                 * - appears in tk_yields token: ->
+                 */
+                return false;
            case '(':
                return true;
            case ')':
@ -149,7 +200,10 @@ namespace xo {
            case '=':
                return true;
            case '-':
-                /* can't be punctuation -- can appear inside f64 token: e.g. 1.23e-9 */
+                /* can't be punctuation
+                 * - can appear inside f64 token: e.g. 1.23e-9.
+                 * - begins tk_yields token: ->
+                 */
                return false;
            case '+':
                /* can't be punctuation -- can appear inside f64 token: e.g. 1.23e+4 */
@ -171,6 +225,10 @@ namespace xo {
        template <typename CharT>
        bool
        tokenizer<CharT>::is_2char_punctuation(CharT ch) const {
+            /* can't put '-' here, because of the way it appears in numeric literals
+             * characters here may not appear in symbol names
+             */
+
            switch(ch) {
            case ':':
                /* can begin := */
@ -182,15 +240,19 @@ namespace xo {

        template <typename CharT>
        auto
-        tokenizer<CharT>::assemble_token(const span_type & token_text) const -> token_type
+        tokenizer<CharT>::assemble_token(std::size_t initial_whitespace,
+                                         std::size_t initial_token_prefix_from_input,
+                                         const span_type & token_text,
+                                         const span_type & input) const -> result_type
        {
-            constexpr bool c_debug_flag = true;
-
            /* literal|pretty|streamlined */
            log_config::style = function_style::streamlined;

-            scope log(XO_DEBUG(c_debug_flag));
-            log && log(xtag("token_text", token_text));
+            scope log(XO_DEBUG(debug_flag_));
+            log && log(xtag("token_text", token_text),
+                       xtag("initial_whitespace", initial_whitespace),
+                       xtag("initial_token_prefix_from_input", initial_token_prefix_from_input),
+                       xtag("input", input));

            tokentype tk_type = tokentype::tk_invalid;
            std::string tk_text;
@ -265,79 +327,89 @@ namespace xo {
                /* true if at least one digit encountered */
                bool number_flag = false;

-                /* token will be one of: {i64, f64, dot}: */
-                for(; ix != token_text.hi(); ++ix) {
-                    if((*ix == '-') || (*ix == '+')) {
-                        /* sign allowed:
-                         * 1. before period and before first digit
-                         * 2. after exponent
-                         */
-                        if (!period_flag && !number_flag && !sign_flag) {
-                            sign_flag = true;
-                        } else if (exponent_flag && !exponent_digit_flag) {
-                            exponent_sign_flag = true;
-                        } else {
-                            throw std::runtime_error
-                                (tostr("tokenizer::assemble_token",
-                                       ": improperly placed sign indicator",
-                                       xtag("pos", ix - tk_start),
-                                       xtag("char", *ix)));
-                        }
-                    } else if(*ix == '.') {
-                        if (period_flag) {
-                            throw (std::runtime_error
-                                   (tostr("tokenizer::assemble_token",
-                                          ": duplicate decimal point",
-                                          xtag("pos", ix - tk_start),
-                                          xtag("char", *ix))));
-                        }
+                log && log(xtag("*ix", *ix),
+                           xtag("tk.length", token_text.size()));
+                if (log && (ix + 1 < tk_end))
+                    log(xtag("*(ix+1)", *(ix + 1)));

-                        period_flag = true;
-                    } else if((*ix == 'e') || (*ix == 'E')) {
-                        if (exponent_flag) {
-                            throw (std::runtime_error
-                                   (tostr("tokenizer::assemble_token",
-                                          ": duplicate exponent marker",
-                                          xtag("pos", ix - tk_start),
-                                          xtag("char", *ix))));
-                        }
-
-                        exponent_flag = true;
-                    } else if(isdigit(*ix)) {
-                        if (exponent_flag) {
-                            /* need digit before exponent to recognize as number */
-                            exponent_digit_flag = true;
-                        } else {
-                            number_flag = true;
-                        }
-                    } else {
-                        /* invalid input */
-                        throw (std::runtime_error
-                               (tostr("tokenizer::assemble_token",
-                                      ": unexpected character in numeric constant",
-                                      xtag("pos", ix - tk_start),
-                                      xtag("char", *ix))));
-                    }
-                }
-
-                if (number_flag) {
-                    if (period_flag || exponent_flag) {
-                        tk_type = tokentype::tk_f64;
-                    } else {
-                        tk_type = tokentype::tk_i64;
-                    }
-                } else if (period_flag && !exponent_flag) {
-                    tk_type = tokentype::tk_dot;
+                if ((*ix == '-') && (ix + 2 == token_text.hi()) && (*(ix + 1) == '>')) {
+                    /* composing exactly '->' */
+                    tk_type = tokentype::tk_yields;
                } else {
-                    /* not a valid token */
-                }
+                    /* token (if valid) will be one of: {tk_i64, tk_f64, tk_dot}: */
+                    for (; ix != token_text.hi(); ++ix) {
+                        if ((*ix == '-') || (*ix == '+')) {
+                            /* sign allowed:
+                             * 1. before period and before first digit
+                             * 2. after exponent
+                             */
+                            if (!period_flag && !number_flag && !sign_flag) {
+                                sign_flag = true;
+                            } else if (exponent_flag && !exponent_digit_flag) {
+                                exponent_sign_flag = true;
+                            } else {
+                                throw std::runtime_error
+                                    (tostr("tokenizer::assemble_token",
+                                           ": improperly placed sign indicator",
+                                           xtag("pos", ix - tk_start),
+                                           xtag("char", *ix)));
+                            }
+                        } else if (*ix == '.') {
+                            if (period_flag) {
+                                throw (std::runtime_error
+                                       (tostr("tokenizer::assemble_token",
+                                              ": duplicate decimal point",
+                                              xtag("pos", ix - tk_start),
+                                              xtag("char", *ix))));
+                            }

-                log && log(xtag("sign_flag", sign_flag));
-                log && log(xtag("period_flag", period_flag),
-                           xtag("exponent_flag", exponent_flag),
-                           xtag("exponent_sign_flag", exponent_sign_flag),
-                           xtag("number_flag", number_flag));
-                log && log(xtag("tk_type", tk_type));
+                            period_flag = true;
+                        } else if ((*ix == 'e') || (*ix == 'E')) {
+                            if (exponent_flag) {
+                                throw (std::runtime_error
+                                       (tostr("tokenizer::assemble_token",
+                                              ": duplicate exponent marker",
+                                              xtag("pos", ix - tk_start),
+                                              xtag("char", *ix))));
+                            }
+
+                            exponent_flag = true;
+                        } else if (isdigit(*ix)) {
+                            if (exponent_flag) {
+                                /* need digit before exponent to recognize as number */
+                                exponent_digit_flag = true;
+                            } else {
+                                number_flag = true;
+                            }
+                        } else {
+                            /* invalid input */
+                            throw (std::runtime_error
+                                   (tostr("tokenizer::assemble_token",
+                                          ": unexpected character in numeric constant",
+                                          xtag("pos", ix - tk_start),
+                                          xtag("char", *ix))));
+                        }
+                    }
+
+                    if (number_flag) {
+                        if (period_flag || exponent_flag) {
+                            tk_type = tokentype::tk_f64;
+                        } else {
+                            tk_type = tokentype::tk_i64;
+                        }
+                    } else if (period_flag && !exponent_flag) {
+                        tk_type = tokentype::tk_dot;
+                    } else {
+                        /* not a valid token */
+                    }
+
+                    log && log(xtag("sign_flag", sign_flag));
+                    log && log(xtag("period_flag", period_flag),
+                               xtag("exponent_flag", exponent_flag),
+                               xtag("exponent_sign_flag", exponent_sign_flag),
+                               xtag("number_flag", number_flag));
+                    log && log(xtag("tk_type", tk_type));
+                }

                break;
            }
@ -569,7 +641,9 @@ namespace xo {
                || (tk_type == tokentype::tk_f64)
                || (tk_type == tokentype::tk_symbol))
            {
-                /* re-parse in token::i64_value() / token::f64_value() */
+                /* note: capturing token text here;
+                 *       for numeric literals will re-parse in token::i64_value() / token::f64_value()
+                 */
                tk_text = std::string(tk_start, tk_end);
            } else if (tk_type == tokentype::tk_string) {
                ; /* nothing to do here -- desired tk_text already constructed */
@ -603,40 +677,96 @@ namespace xo {
                    tk_text.clear();
            }

-            return token_type(tk_type, std::move(tk_text));
+            return result_type(token_type(tk_type, std::move(tk_text)),
+                               input.prefix(initial_whitespace + initial_token_prefix_from_input));
        } /*assemble_token*/

        template <typename CharT>
        auto
-        tokenizer<CharT>::scan(const span_type & input) -> scan_result
+        tokenizer<CharT>::assemble_final_token(const span_type & token_text) const -> result_type
        {
-            constexpr bool c_debug_flag = true;
-            scope log(XO_DEBUG(c_debug_flag));
+            return assemble_token(0 /*initial_whitespace*/,
+                                  0 /*initial_token_prefix_from_input*/,
+                                  token_text,
+                                  span_type::make_null());
+        }
+
+        template <typename CharT>
+        auto
+        tokenizer<CharT>::scan_completion(const span_type & whitespace,
+                                          const CharT* token_end,
+                                          const span_type & input) -> result_type {
+
+            auto token_span = input.after_prefix(whitespace).prefix_upto(token_end);
+
+            if (this->prefix_.empty()) {
+                return assemble_token(whitespace.size(),
+                                      token_span.size() /*initial_token_prefix_from_input*/,
+                                      token_span,
+                                      input);
+            } else {
+                /* whatever we stashed in .prefix_, should be consumed from input.
+                 * control here implies reached end of input with either
+                 * - input for which parsing outcome depends on existence of more input,
+                 *   and presence of eof now resolves
+                 * - malformed input (that might represent prefix of a valid token.  Say "#incl" in C)
+                 *
+                 * That means stashed .prefix will represent copied range of characters that
+                 * ends at the same position as input
+                 */
+                return result_type::make_partial(input);
+            }
+
+        }
+
+        template <typename CharT>
+        auto
+        tokenizer<CharT>::scan(const span_type & input) -> result_type
+        {
+            scope log(XO_DEBUG(debug_flag_));

            log && log(xtag("input", input));

            const CharT * ix = input.lo();

-            /* skip whitespace */
-            while (is_whitespace(*ix) && (ix != input.hi()))
-                ++ix;
+            /* skip whitespace + remember beginning of most recent line */
+            while (is_whitespace(*ix) && (ix != input.hi())) {
+
+                if (is_newline(*ix)) {
+                    ++ix;
+                    /* look ahead to {end of line, end of input}, whichever comes first */
+                    const CharT * sol = ix;
+                    const CharT * eol = ix;
+
+                    while ((eol < input.hi()) && (*eol != '\n'))
+                        ++eol;
+
+                    this->current_line_ = span_type(sol, eol);
+                } else {
+                    ++ix;
+                }
+            }

            if(ix == input.hi()) {
                /* no-op */
-                return {
-                    token_type::invalid(),
-                    input.prefix_upto(ix)
-                };
+                return result_type::make_whitespace(input.prefix_upto(ix));
            }

+            // TODO:
+            // 1. hoist complete_flag up here
+            // 2. use in each branch
+            // 3. common check for prefix-capturing after if-cascade below done
+
            /* here: *ix is not whitespace */

            auto whitespace = input.prefix_upto(ix);

            log && log(xtag("whitespace.size", whitespace.size()));

-            /* tk_start points to beginning of token
+            /* tk_start points to known beginning of token
             * (after any whitespace)
+             *
+             * goal is to leave ix pointing to 1 char past the end of the token
             */
            const CharT * tk_start = ix;

@ -654,7 +784,7 @@ namespace xo {
                    /* need more input to know if/when token complete */
                    this->prefix_ += std::string(tk_start, input.hi());

-                    log && log(xtag("captured-prefix", this->prefix_));
+                    log && log(xtag("captured-prefix1", this->prefix_));
                } else {
                    CharT ch2 = *ix;

@ -701,9 +831,49 @@ namespace xo {
                    /* need more input to know if/when token complete */
                    this->prefix_ += std::string(tk_start, input.hi());

-                    log && log(xtag("captured-prefix", this->prefix_));
+                    log && log(xtag("captured-prefix2", this->prefix_));
                }
            } else {
+                /* ix is start of some token */
+
+                if (*ix == '-') {
+                    /* this section load-bearing for input '->' scanning from beginning of token */
+                    ++ix;
+
+                    if (ix == input.hi()) {
+                        /* need more input to know if/when token complete -- see captured-prefix5 below */
+                    } else {
+                        CharT ch2 = *ix;
+
+                        if (ch2 == '>') {
+                            /* include next char and complete token */
+                            ++ix;
+
+                            return scan_completion(whitespace, ix /*token_end*/, input);
+                        }
+
+                        /* here: -123, -.5e-21 for example */
+                    }
+                } else if (*ix == '>') {
+                    /* this section load-bearing for input '>=' scanning from beginning of token.
+                     * Need this because '>' necessarily excluded from is_1char_punctuation()
+                     */
+                    ++ix;
+
+                    if (ix == input.hi()) {
+                        /* need more input to know if/when token complete -- see captured-prefix5 below */
+                    } else {
+                        CharT ch2 = *ix;
+
+                        if (ch2 != '=') {
+                            /* ignore next char and complete token */
+                            return scan_completion(whitespace, ix /*token_end*/, input);
+                        }
+
+                        /* here: >= for example */
+                    }
+                }
+
                /* scan until:
                 * - whitespace
                 * - punctuation
@ -715,59 +885,85 @@ namespace xo {
                    {
                        break;
                    }
+
+                    /* this section load-bearing for input '>' after beginning of a token, e.g. p> */
+                    if ((ix > tk_start) && (*ix == '>'))
+                        break;
+
+                    /* this section load-bearing for input '->' at the end of another token, e.g. p->q */
+                    if (*ix == '-') {
+                        if (ix + 1 == input.hi()) {
+                            /* need more input to know if/when token complete
+                             *
+                             *   apple-banana   parses as: {tk_symbol: apple-banana}
+                             *   apple->        parses as: {tk_symbol: apple} {tk_yields}
+                             *   apple-         illegal (may not end symbol with '-')
+                             */
+                            break;
+                        }
+
+                        if (*(ix + 1) == '>') {
+                            /* treat '->' as punctuation;  complete preceding token */
+                            break;
+                        }
+                    }
                }

                if (ix == input.hi()) {
                    /* need more input to know if/when token complete */
                    this->prefix_ += std::string(tk_start, input.hi());

-                    log && log(xtag("captured-prefix", this->prefix_));
+                    log && log(xtag("captured-prefix5", this->prefix_));
                }
            }

-            auto token_span = input.after_prefix(whitespace).prefix_upto(ix);
-
-            token tk
-                = (this->prefix_.empty()
-                   ? assemble_token(token_span)
-                   : token_type(tokentype::tk_invalid));
-
-            return scan_result
-                { tk, input.prefix(whitespace.size() + token_span.size()) };
+            return scan_completion(whitespace, ix /*token_end*/, input);
        } /*scan*/

        template <typename CharT>
        auto
-        tokenizer<CharT>::scan2(const span_type & input, bool eof) -> scan_result {
+        tokenizer<CharT>::scan2(const span_type & input, bool eof) -> result_type {
+            scope log(XO_DEBUG(debug_flag_));
+
            auto sr = this->scan(input);

-            if (!sr.first.is_valid() && eof) {
-                sr.first = this->notify_eof();
-                /* always consume remainder of input here.
-                 * ambiguous prefix can represent at most one token
-                 */
-                sr.second = input;
-            }
+            if (sr.is_token() || sr.is_error() || !eof)
+                return sr;

-            return sr;
+            /* control here only if input contains no unambiguous tokens.
+             * This implies it contains _at most one_ final token.
+             */
+
+            span_type input2 = input.after_prefix(sr.consumed());
+
+            /* need to include src.consumed() in retval */
+
+            auto sr2 = this->notify_eof(input2);
+
+            return result_type(sr2.get_token(),
+                               span_type::concat(sr.consumed(), sr2.consumed()),
+                               sr2.error());
        }

        template <typename CharT>
        auto
-        tokenizer<CharT>::notify_eof() -> token_type {
-            constexpr bool c_debug_flag = true;
+        tokenizer<CharT>::notify_eof(const span_type & input) -> result_type {
+            scope log(XO_DEBUG(debug_flag_));

-            scope log(XO_DEBUG(c_debug_flag));
+            log && log(xtag("prefix_", prefix_), xtag("prefix_.size", prefix_.size()), xtag("input", input));

-            token tk
-                = (this->prefix_.empty()
-                   ? token_type(tokentype::tk_invalid)
-                   : assemble_token(span_type(&prefix_[0],
-                                              &prefix_[prefix_.size()])));
+            if (this->prefix_.empty()) {
+                /* almost meretricious to include input here,
+                 * when called from scan2() it can only be whitespace
+                 */
+                return result_type::make_whitespace(input);
+            } else {
+                auto retval = assemble_final_token(span_type::from_string(prefix_));

-            this->prefix_.clear();
+                this->prefix_.clear();

-            return tk;
+                return retval;
+            }
        } /*notify_eof*/
    } /*namespace scm*/
 } /*namespace xo*/
--- a/xo-tokenizer/include/xo/tokenizer/tokentype.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokentype.hpp
@ -15,7 +15,7 @@ namespace xo {
         *
         *  Schematica code examples:
         *
-         *    type point :: { xcoord : f64, ycoord: f64 };
+         *    type point :: { xcoord : f64, ycoord : f64 };
         *    type matrix :: array<double, 2>;  // 2-d array
         *
         *    decl hypot(x : f64, y : f64) -> f64;
@ -39,7 +39,7 @@ namespace xo {
         *    };
         *
         *    def matrixproduct(x : matrix, y : matrix) {
-         *      [i,j : x.row(i) * y.col(j)];
+         *      [i, j : x.row(i) * y.col(j)];
         *    };
         **/
        enum class tokentype {
@ -120,7 +120,7 @@ namespace xo {
            /** operator '/' **/
            tk_slash,

-            /** keyworkd 'type' **/
+            /** keyword 'type' **/
            tk_type,

            /** keyword 'def' **/
--- a/xo-tokenizer/utest/token.test.cpp
+++ b/xo-tokenizer/utest/token.test.cpp
@ -12,70 +12,76 @@ namespace xo {
    using xo::scm::tokentype;

    namespace ut {
-        struct testcase_i64 {
-            std::string text_;
-            bool expect_throw_;
-            std::int64_t expected_;
-        };
+        // also see tokenizer.test.cpp for syntax

-        std::vector<testcase_i64> s_testcase_v = {
-            {"", true, 0},
-            {"0", false, 0},
-            {"-", true, 0},
-            {"+", true, 0},
-            {"-0", false, 0},
-            {"+0", false, 0},
-            {"1", false, 1},
-            {"-1", false, -1},
-            {"9", false, 9},
-            {"-9", false, -9},
-            {"12", false, 12},
-            {"+12", false, 12},
-            {"-12", false, -12},
-            {"99", false, 99},
-            {"-99", false, -99},
-            {"123x", true, 0},
-        };
+        namespace test2 {
+            struct testcase_i64 {
+                std::string text_;
+                bool expect_throw_;
+                std::int64_t expected_;
+            };

-        TEST_CASE("parse-i64", "[token]") {
-            for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) {
-                INFO(xtag("i_tc", i_tc));
+            std::vector<testcase_i64> s_testcase_v = {
+                {"", true, 0},
+                {"0", false, 0},
+                {"-", true, 0},
+                {"+", true, 0},
+                {"-0", false, 0},
+                {"+0", false, 0},
+                {"1", false, 1},
+                {"-1", false, -1},
+                {"9", false, 9},
+                {"-9", false, -9},
+                {"12", false, 12},
+                {"+12", false, 12},
+                {"-12", false, -12},
+                {"99", false, 99},
+                {"-99", false, -99},
+                {"123x", true, 0},
+            };

-                auto const & testcase = s_testcase_v[i_tc];
+            TEST_CASE("parse-i64", "[token]") {
+                for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) {
+                    INFO(xtag("i_tc", i_tc));

-                token tk(tokentype::tk_i64,
-                         testcase.text_);
+                    auto const & testcase = s_testcase_v[i_tc];

-                REQUIRE(tk.tk_type() == tokentype::tk_i64);
+                    token tk(tokentype::tk_i64,
+                             testcase.text_);
+
+                    REQUIRE(tk.tk_type() == tokentype::tk_i64);
+
+                    bool throw_flag = false;
+                    try {
+                        std::int64_t x = tk.i64_value();
+
+                        REQUIRE(x == testcase.expected_);
+                    } catch (std::exception & ex) {
+                        throw_flag = true;
+                    }
+
+                    REQUIRE(throw_flag == testcase.expect_throw_);
+                }
+            }
+        }
+
+        namespace test3 {
+            TEST_CASE("error-i64", "[token]") {
+                token tk(tokentype::tk_i64, "+");

                bool throw_flag = false;
-                try {
-                    std::int64_t x = tk.i64_value();

-                    REQUIRE(x == testcase.expected_);
-                } catch (std::exception & ex) {
+                try {
+                    tk.i64_value();
+                } catch(std::exception & ex) {
                    throw_flag = true;
                }

-                REQUIRE(throw_flag == testcase.expect_throw_);
+                REQUIRE(throw_flag);
            }
        }

-        TEST_CASE("error-i64", "[token]") {
-            token tk(tokentype::tk_i64, "+");
-
-            bool throw_flag = false;
-
-            try {
-                tk.i64_value();
-            } catch(std::exception & ex) {
-                throw_flag = true;
-            }
-
-            REQUIRE(throw_flag);
-        }
-
-        namespace {
+        namespace test4 {
            struct testcase_f64 {
                std::string text_;
                bool expect_throw_;
--- a/xo-tokenizer/utest/tokenizer.test.cpp
+++ b/xo-tokenizer/utest/tokenizer.test.cpp
@ -12,6 +12,79 @@ namespace xo {
    using xo::scm::span;

    namespace ut {
+        /** Two-pass test harness.
+         *
+         *   First pass - verify test assertions.
+         *   Second pass only if first pass failed.
+         *   On second pass, enable verbose logging
+         **/
+        struct rehearser {
+            /* expect at most one iterator to exist per TestRehearser instance **/
+            struct iterator {
+                iterator(rehearser* parent, std::uint32_t attention) : parent_{parent}, attention_{attention} {}
+
+                iterator& operator++();
+                std::uint32_t operator*() { return attention_; }
+
+                bool operator==(const iterator& ix2) const {
+                    return (parent_ == ix2.parent_) && (attention_ == ix2.attention_);
+                }
+
+                rehearser* parent_ = nullptr;
+                std::uint32_t attention_ = 0;
+
+            };
+
+            bool is_second_pass() const { return attention_ == 1; }
+            bool enable_debug() const { return is_second_pass(); }
+
+            iterator begin() { return iterator(this, 0); }
+            iterator end()   { return iterator(this, 2); }
+
+        public:
+            /** pass number: 0 or 1 **/
+            std::uint32_t attention_ = 0;
+            /** @brief set to true when test starts; false if first pass fails **/
+            bool ok_flag_ = true;
+        };
+
+        auto rehearser::iterator::operator++() -> iterator&
+        {
+            ++attention_;
+
+            if (parent_->ok_flag_ && attention_ == 1) {
+                /* skip 2nd pass */
+                ++attention_;
+            }
+
+            return *this;
+        }
+
+        /* use this instead of REQUIRE(expr) in context of a test_rehearser */
+#      define REHEARSE(rehearser, expr)           \
+        if (rehearser.is_second_pass()) {         \
+            REQUIRE((expr));                      \
+        } else {                                  \
+            REQUIRE(true);                        \
+            rehearser.ok_flag_ &= (expr);         \
+        }
+
+        /* note: trivial REQUIRE() call in else branch bc we still want
+         *       catch2 to count assertions when verification succeeds
+         */
+#    define REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr) \
+        if (catch_flag) {                                \
+            REQUIRE((expr));                             \
+        } else {                                         \
+            REQUIRE(true);                               \
+            ok_flag &= (expr);                           \
+        }
+
+#    define REQUIRE_ORFAIL(ok_flag, catch_flag, expr)    \
+        REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr);    \
+        if (!ok_flag)                                    \
+            return ok_flag
+
        namespace {
            struct testcase_tkz {
                std::string input_;
@ -22,66 +95,73 @@ namespace xo {

            std::vector<testcase_tkz>
            s_testcase_v = {
-                {"<", false, token::leftangle(), true},
-                {">", false, token::rightangle(), true},
+                /*
+                 *
+                 *        expect_throw              consume_all
+                 *        v                         v
+                 */
+                {"<",     false, token::leftangle(), true},
+                /* possible prefix of >= */
+                {">",     false, token::rightangle(), true},
+                {"> ",    false, token::rightangle(), false},

-                {"(", false, token::leftparen(), true},
-                {")", false, token::rightparen(), true},
+                {"(",     false, token::leftparen(), true},
+                {")",     false, token::rightparen(), true},

-                {"[", false, token::leftbracket(), true},
-                {"]", false, token::rightbracket(), true},
+                {"[",     false, token::leftbracket(), true},
+                {"]",     false, token::rightbracket(), true},

-                {"{", false, token::leftbrace(), true},
-                {" {", false, token::leftbrace(), true},
+                {"{",     false, token::leftbrace(), true},
+                {" {",    false, token::leftbrace(), true},

-                {"\t{", false, token::leftbrace(), true},
-                {"\n{", false, token::leftbrace(), true},
-                {"}", false, token::rightbrace(), true},
+                {"\t{",   false, token::leftbrace(), true},
+                {"\n{",   false, token::leftbrace(), true},
+                {"}",     false, token::rightbrace(), true},

-                {"0",  false, token::i64_token("0"), true},
-                {"1",  false, token::i64_token("1"), true},
-                {"12",  false, token::i64_token("12"), true},
-                {"123",  false, token::i64_token("123"), true},
+                {"0",     false, token::i64_token("0"), true},
+                {"1",     false, token::i64_token("1"), true},
+                {"12",    false, token::i64_token("12"), true},
+                {"123",   false, token::i64_token("123"), true},
                {"1234",  false, token::i64_token("1234"), true},

-                {"0 ", false, token::i64_token("0"), false},
-                {"1 ", false, token::i64_token("1"), false},
-                {"12 ", false, token::i64_token("12"), false},
-                {"123 ", false, token::i64_token("123"), false},
+                {"0 ",    false, token::i64_token("0"), false},
+                {"1 ",    false, token::i64_token("1"), false},
+                {"12 ",   false, token::i64_token("12"), false},
+                {"123 ",  false, token::i64_token("123"), false},
                {"1234 ", false, token::i64_token("1234"), false},

-                {"1<", false, token::i64_token("1"), false},
-                {"1>", false, token::i64_token("1"), false},
-                {"1(", false, token::i64_token("1"), false},
-                {"1)", false, token::i64_token("1"), false},
-                {"1[", false, token::i64_token("1"), false},
-                {"1]", false, token::i64_token("1"), false},
-                {"1{", false, token::i64_token("1"), false},
-                {"1}", false, token::i64_token("1"), false},
-                {"1;", false, token::i64_token("1"), false},
-                {"1:", false, token::i64_token("1"), false},
-                {"1,", false, token::i64_token("1"), false},
+                {"1<",    false, token::i64_token("1"), false},
+                {"1>",    false, token::i64_token("1"), false},
+                {"1(",    false, token::i64_token("1"), false},
+                {"1)",    false, token::i64_token("1"), false},
+                {"1[",    false, token::i64_token("1"), false},
+                {"1]",    false, token::i64_token("1"), false},
+                {"1{",    false, token::i64_token("1"), false},
+                {"1}",    false, token::i64_token("1"), false},
+                {"1;",    false, token::i64_token("1"), false},
+                {"1:",    false, token::i64_token("1"), false},
+                {"1,",    false, token::i64_token("1"), false},

-                {".1", false, token::f64_token(".1"), true},
-                {".12", false, token::f64_token(".12"), true},
-                {".123", false, token::f64_token(".123"), true},
+                {".1",    false, token::f64_token(".1"), true},
+                {".12",   false, token::f64_token(".12"), true},
+                {".123",  false, token::f64_token(".123"), true},

-                {"+.1", false, token::f64_token("+.1"), true},
-                {"+.12", false, token::f64_token("+.12"), true},
+                {"+.1",   false, token::f64_token("+.1"), true},
+                {"+.12",  false, token::f64_token("+.12"), true},
                {"+.123", false, token::f64_token("+.123"), true},

-                {"-.1", false, token::f64_token("-.1"), true},
-                {"-.12", false, token::f64_token("-.12"), true},
+                {"-.1",   false, token::f64_token("-.1"), true},
+                {"-.12",  false, token::f64_token("-.12"), true},
                {"-.123", false, token::f64_token("-.123"), true},

-                {"1.", false, token::f64_token("1."), true},
-                {"1.2", false, token::f64_token("1.2"), true},
-                {"1.23", false, token::f64_token("1.23"), true},
+                {"1.",    false, token::f64_token("1."), true},
+                {"1.2",   false, token::f64_token("1.2"), true},
+                {"1.23",  false, token::f64_token("1.23"), true},

-                {"1e0", false, token::f64_token("1e0"), true},
-                {"1e-1", false, token::f64_token("1e-1"), true},
-                {"1e1", false, token::f64_token("1e1"), true},
-                {"1e+1", false, token::f64_token("1e+1"), true},
+                {"1e0",   false, token::f64_token("1e0"), true},
+                {"1e-1",  false, token::f64_token("1e-1"), true},
+                {"1e1",   false, token::f64_token("1e1"), true},
+                {"1e+1",  false, token::f64_token("1e+1"), true},

                {"\"hello\"", false, token::string_token("hello"), true},
                /* tokenizer sees this input:
@ -99,10 +179,20 @@ namespace xo {
                {"\"tab to the right [\\t], to the right [\\t]\"", false,
                 token::string_token("tab to the right [\t], to the right [\t]"), true},

+                {".", false, token::dot(), true},
                {":", false, token::colon(), true},
+                {",", false, token::comma(), true},
+                {"=", false, token::singleassign(), true},
                {":=", false, token::assign_token(), true},
+                {"->", false, token::yields(), true},
+
+                {"+", false, token::plus_token(), true},
+                {"-", false, token::minus_token(), true},
+                {"*", false, token::star_token(), true},
+                {"/", false, token::slash_token(), true},

                {"symbol", false, token::symbol_token("symbol"), true},
+                {"another-symbol", false, token::symbol_token("another-symbol"), true},

                {"type", false, token::type(), true},
                {"def", false, token::def(), true},
@ -112,58 +202,59 @@ namespace xo {
                {"in", false, token::in(), true},
                {"end", false, token::end(), true},

-                {"*", false, token::star_token(), true},
            };
        }

        TEST_CASE("tokenizer", "[tokenizer]") {
            for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) {
+
                const testcase_tkz & testcase = s_testcase_v[i_tc];

-                INFO(xtag("input", testcase.input_));
-                INFO(xtag("i_tc", i_tc));
+                rehearser rh;

-                using tokenizer
-                    = xo::scm::tokenizer<char>;
+                for (auto _ : rh) {
+                    scope log(XO_DEBUG2(rh.enable_debug(), "tokenizer"));

-                tokenizer tkz;
-                tokenizer::span_type
-                    in_span(testcase.input_.c_str(),
-                            testcase.input_.c_str() + testcase.input_.size());
+                    log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_));

-                auto out = tkz.scan(in_span);
+                    using tokenizer
+                        = xo::scm::tokenizer<char>;

-                auto tk = out.first;
+                    tokenizer tkz(rh.enable_debug());
+                    tokenizer::span_type
+                        in_span(testcase.input_.c_str(),
+                                testcase.input_.c_str() + testcase.input_.size());

-                if (tk.is_invalid())
-                    tk = tkz.notify_eof();
+                    auto sr = tkz.scan2(in_span, true /*eof*/);

-                REQUIRE(tk.tk_type() == testcase.expected_tk_.tk_type());
-                if (tk.tk_type() == tokentype::tk_i64)
-                {
-                    REQUIRE(!tk.text().empty());
-                    REQUIRE(tk.i64_value() == testcase.expected_tk_.i64_value());
-                } else if (tk.tk_type() == tokentype::tk_f64)
-                {
-                    REQUIRE(!tk.text().empty());
-                    REQUIRE(tk.f64_value() == testcase.expected_tk_.f64_value());
-                } else if(tk.tk_type() == tokentype::tk_string)
-                {
-                    /* tk.text() can be empty, consider input "" */
-                    REQUIRE(tk.text() == testcase.expected_tk_.text());
-                } else if(tk.tk_type() == tokentype::tk_symbol)
-                {
-                    REQUIRE(!tk.text().empty());
-                    REQUIRE(tk.text() == testcase.expected_tk_.text());
-                } else {
-                    REQUIRE(tk.text().empty());
+                    REHEARSE(rh, sr.get_token().tk_type() == testcase.expected_tk_.tk_type());
+                    if (sr.get_token().tk_type() == tokentype::tk_i64)
+                    {
+                        REHEARSE(rh, !sr.get_token().text().empty());
+                        REHEARSE(rh, sr.get_token().i64_value() == testcase.expected_tk_.i64_value());
+                    } else if (sr.get_token().tk_type() == tokentype::tk_f64)
+                    {
+                        REHEARSE(rh, !sr.get_token().text().empty());
+                        REHEARSE(rh, sr.get_token().f64_value() == testcase.expected_tk_.f64_value());
+                    } else if(sr.get_token().tk_type() == tokentype::tk_string)
+                    {
+                        /* sr.get_token().text() can be empty, consider input "" */
+                        REHEARSE(rh, sr.get_token().text() == testcase.expected_tk_.text());
+                    } else if(sr.get_token().tk_type() == tokentype::tk_symbol)
+                    {
+                        REHEARSE(rh, !sr.get_token().text().empty());
+                        REHEARSE(rh, sr.get_token().text() == testcase.expected_tk_.text());
+                    } else {
+                        REHEARSE(rh, sr.get_token().text().empty());
+                    }
+
+                    /* must consume all input for tests we're doing here */
+                    if (testcase.consume_all_) {
+                        REHEARSE(rh, sr.consumed() == in_span);
+                    } else {
+                        REHEARSE(rh, sr.consumed() != in_span);
+                    }
                }
-
-                /* must consume all input for tests we're doing here */
-                if (testcase.consume_all_)
-                    REQUIRE(out.second == in_span);
-                else
-                    REQUIRE(out.second != in_span);
            }
        }

@ -208,56 +299,134 @@ namespace xo {
                  token::symbol_token("y"),
                  token::semicolon(),
                  token::rightbrace()
-                 }}
+                 }},
+                {"a.b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::dot(),
+                  token::symbol_token("b")
+                 }},
+                {"a,b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::comma(),
+                  token::symbol_token("b")
+                 }},
+                {"a:b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::colon(),
+                  token::symbol_token("b")
+                 }},
+                {"a;b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::semicolon(),
+                  token::symbol_token("b")
+                 }},
+                {"a:=b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::assign_token(),
+                  token::symbol_token("b")
+                 }},
+                {"a=b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::singleassign(),
+                  token::symbol_token("b")
+                 }},
+                {"p->q",
+                 false,
+                 {token::symbol_token("p"),
+                  token::yields(),
+                  token::symbol_token("q")
+                 }},
+                {"a + b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::plus_token(),
+                  token::symbol_token("b")
+                 }},
+                {"a - b",
+                 false,
+                 {token::symbol_token("a"),
+                  token::minus_token(),
+                  token::symbol_token("b")
+                 }},
+                {"a-b",
+                 false,
+                 {token::symbol_token("a-b"),
+                 }},
+                {"(apple)",
+                 false,
+                 {token::leftparen(),
+                  token::symbol_token("apple"),
+                  token::rightparen()
+                 }},
+                {"<apple>",
+                 false,
+                 {token::leftangle(),
+                  token::symbol_token("apple"),
+                  token::rightangle()
+                 }},
            };
        }

        TEST_CASE("tokenizer2", "[tokenizer]") {
+            /* this time testing token sequences */
+
+            using tokenizer = xo::scm::tokenizer<char>;
+
            for (std::size_t i_tc = 0, n_tc = s_testcase2_v.size(); i_tc < n_tc; ++i_tc) {
                const testcase2_tkz & testcase = s_testcase2_v[i_tc];

-                INFO(xtag("input", testcase.input_));
-                INFO(xtag("i_tc", i_tc));
+                rehearser rh;

-                using tokenizer
-                    = xo::scm::tokenizer<char>;
+                for (auto _ : rh) {
+                    scope log(XO_DEBUG2(rh.enable_debug(), "tokenizer2"));

-                tokenizer tkz;
-                tokenizer::span_type
-                    in_span(testcase.input_.c_str(),
-                            testcase.input_.c_str() + testcase.input_.size());
+                    log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_));

-                for (int i_tk = 0, n_tk = testcase.expected_tk_v_.size();
-                     i_tk < n_tk; ++i_tk)
-                {
-                    INFO(xtag("i_tk", i_tk));
+                    tokenizer tkz(rh.enable_debug());

-                    auto res = tkz.scan2(in_span, in_span.empty());
-                    const auto & tk = res.first;
+                    tokenizer::span_type
+                        in_span(testcase.input_.c_str(),
+                                testcase.input_.c_str() + testcase.input_.size());

-                    if (tk.is_valid())
-                        REQUIRE(tk.tk_type() == testcase.expected_tk_v_[i_tk].tk_type());
-                    if (tk.tk_type() == tokentype::tk_i64)
+                    for (int i_tk = 0, n_tk = testcase.expected_tk_v_.size();
+                         i_tk < n_tk; ++i_tk)
                    {
-                        REQUIRE(!tk.text().empty());
-                        REQUIRE(tk.i64_value() == testcase.expected_tk_v_[i_tk].i64_value());
-                    } else if (tk.tk_type() == tokentype::tk_f64)
-                    {
-                        REQUIRE(!tk.text().empty());
-                        REQUIRE(tk.f64_value() == testcase.expected_tk_v_[i_tk].f64_value());
-                    } else if(tk.tk_type() == tokentype::tk_string)
-                    {
-                        /* tk.text() can be empty, consider input "" */
-                        REQUIRE(tk.text() == testcase.expected_tk_v_[i_tk].text());
-                    } else if(tk.tk_type() == tokentype::tk_symbol)
-                    {
-                        REQUIRE(!tk.text().empty());
-                        REQUIRE(tk.text() == testcase.expected_tk_v_[i_tk].text());
-                    } else {
-                        REQUIRE(tk.text().empty());
+                        log && log(xtag("i_tk", i_tk));
+
+                        auto sr = tkz.scan2(in_span, in_span.empty());
+                        const auto & tk = sr.get_token();
+
+                        if (tk.is_valid()) {
+                            REHEARSE(rh, tk.tk_type() == testcase.expected_tk_v_[i_tk].tk_type());
+                        }
+                        if (tk.tk_type() == tokentype::tk_i64)
+                        {
+                            REHEARSE(rh, !tk.text().empty());
+                            REHEARSE(rh, tk.i64_value() == testcase.expected_tk_v_[i_tk].i64_value());
+                        } else if (tk.tk_type() == tokentype::tk_f64)
+                        {
+                            REHEARSE(rh, !tk.text().empty());
+                            REHEARSE(rh, tk.f64_value() == testcase.expected_tk_v_[i_tk].f64_value());
+                        } else if(tk.tk_type() == tokentype::tk_string)
+                        {
+                            /* tk.text() can be empty, consider input "" */
+                            REHEARSE(rh, tk.text() == testcase.expected_tk_v_[i_tk].text());
+                        } else if(tk.tk_type() == tokentype::tk_symbol)
+                        {
+                            REHEARSE(rh, !tk.text().empty());
+                            REHEARSE(rh, tk.text() == testcase.expected_tk_v_[i_tk].text());
+                        } else {
+                            REHEARSE(rh, tk.text().empty());
+                        }
+
+                        in_span = in_span.after_prefix(sr.consumed());
                    }
-
-                    in_span = in_span.after_prefix(res.second);
                }
            }
        } /*TEST_CASE(tokenizer2)*/