From 84c5a75b289e641e9589fc4939681199e8007453 Mon Sep 17 00:00:00 2001
From: Roland Conybeare <rconybeare@gmail.com>
Date: Sat, 22 Nov 2025 20:13:33 -0500
Subject: [PATCH] xo-tokenizer: refactor to correct accounting for
 line/consume/errpos

---
 xo-alloc/include/xo/alloc/ArenaAlloc.hpp      |  86 ++++++-
 xo-alloc/src/alloc/ArenaAlloc.cpp             | 107 ++++++--
 xo-reader/src/reader/reader.cpp               |  24 +-
 xo-tokenizer/example/tokenrepl/tokenrepl.cpp  |  29 +--
 .../include/xo/tokenizer/input_state.hpp      | 209 ++++++++++++---
 .../include/xo/tokenizer/scan_result.hpp      |  29 ++-
 .../include/xo/tokenizer/tokenizer.hpp        | 242 ++++++++----------
 .../include/xo/tokenizer/tokenizer_error.hpp  |  10 +-
 xo-tokenizer/utest/tokenizer.test.cpp         |   8 +-
 9 files changed, 501 insertions(+), 243 deletions(-)

diff --git a/xo-alloc/include/xo/alloc/ArenaAlloc.hpp b/xo-alloc/include/xo/alloc/ArenaAlloc.hpp
index da67f8f2..e0bfed2f 100644
--- a/xo-alloc/include/xo/alloc/ArenaAlloc.hpp
+++ b/xo-alloc/include/xo/alloc/ArenaAlloc.hpp
@@ -18,11 +18,11 @@ namespace xo {
          *    allocation order:
          *    ----------------------->
          *
-         *    <----------------- .size() ------------------>
-         *    <----------------- .committed() --------------->
+         *    <----------------- .size(), .reserved() --------------------------->
+         *    <----------------- .committed() ------------->
          *
-         *    <-------allocated------><--------free-------->  <---uncommitted---->
-         *    XXXXXXXXXXXXXXXXXXXXXXXX______________________  ....................
+         *    <-------allocated------><--------free--------><-----uncommitted---->
+         *    XXXXXXXXXXXXXXXXXXXXXXXX______________________......................
          *    ^       ^               ^                     ^                     ^
          *    lo      checkpoint   free                 limit                    hi
          *
@@ -31,12 +31,77 @@ namespace xo {
          *   >        < .before_checkpoint()
          *           >                < .after_checkpoint()
          *
+         *   lifetime:
+         *
+         *   1. initial state after ctor
+         *
+         *   >< committed()=0
+         *    <---------------------------uncommitted---------------------------->
+         *    ....................................................................
+         *    ^                                                                   ^
+         *    lo                                                                 hi
+         *    checkpoint
+         *    free
+         *    limit
+         *
+         *    1a. one call to ::mmap()
+         *    1b. vm address space [lo,hi) is reserved
+         *    1c. address space [lo,hi) is inaccessible. no read|write|execute permission
+         *
+         *   2. after first allocation of n bytes
+         *
+         *    <--committed--->
+         *          <--free--><--------------------uncommitted-------------------->
+         *   >      <- allocated
+         *    XXXXXX__________.....................................................
+         *    ^     ^         ^                                                    ^
+         *    lo    lo+n  limit                                                   hi
+         *    ^     free
+         *    checkpoint
+         *
+         *    2a. committed just enough hugepages (2mb each) to accomodate n,
+         *        i.e. expand-on-demand:
+         *        - one call to ::mprotect()
+         *        - .limit = .lo + (k+1) * .hugepage_z for some integer k>=0
+         *        - k * .page_z <= n < (k+1) * .hugepage_z
+         *    2b. expect immediate cost 1-5us, includes:
+         *        - TLB flush
+         *          invalidate TLB entries for committed range on all cores that this
+         *          process' threads have run on since process inception.
+         *          Also, if a kernel thread has run on one of said cores, it may
+         *          have borrowed our TLB entries
+         *        - page table update
+         *          write to entry for each vm page
+         *        - kernel overhead 100-1000 cycles (< 1us)
+         *    2c. expect deferred cost 1us-2us per hugepage:
+         *        - committed pages aren't backed by physical memory until
+         *          first touched; minor page fault on first access for each page.
+         *        - so about 256-512us for 1MB
+         *   3. after .expand(z)
+         *
+         *    <-------------committed------------>
+         *          <------------free------------><----------uncomitted----------->
+         *   >      <- allocated
+         *    XXXXXX______________________________.................................
+         *    ^     ^                             ^                                ^
+         *    lo    lo+n                      limit                               hi
+         *    ^     free
+         *    checkpoint
+         *
+         *    3a. same as case 2. but without advancing .free pointer.
+         *
+         *   4. after dtor
+         *
+         *    4a. all memory returned to o/s, no longer reserved.
+         *        - one call to ::munmap()
+         *
          *  @endtext
          *
          *  Design Notes:
          *  - non-copyable, non-moveable
-         *  - always heap-allocated
          *  - @ref lo_ <= @ref checkpoint_ <= @ref free_ <= @ref limit_ <= @ref hi_
+         *  - memory for ArenaAlloc itself (not the memory it allocates), ~100 bytes
+         *    always heap allocated.  Use ArenaAlloc::make()
          *  - memory obtained from mmap(), not heap
          *  - memory addresses are stable. Expand storage by committing VM pages.
          *  - @ref lo_ is aligned on VM page size (guaranteed by mmap())
@@ -55,7 +120,7 @@ namespace xo {
 
             /** Create allocator with capacity @p z,
              *  Reserve memory addresses for @p z bytes,
-             *  but don't commit them until needed
+             *  (but don't commit them until needed)
              **/
             static up<ArenaAlloc> make(const std::string & name,
                                        std::size_t z,
@@ -127,7 +192,12 @@ namespace xo {
             std::string name_;
 
             /** size of a VM page (from getpagesize()) **/
-            std::size_t page_z_;
+            std::size_t page_z_ = 0;
+
+            /** size of a huge VM page. hardwiring this in ctor (to 2MB).
+             *  larger pages relieve pressure on TLB, but suboptimal if use << 2MB
+             **/
+            std::size_t hugepage_z_ = 0;
 
             /** allocator owns memory in range [@ref lo_, @ref hi_) **/
             std::byte * lo_ = nullptr;
@@ -139,7 +209,7 @@ namespace xo {
              *  older (addresses below checkpoint)
              *  and younger (addresses above checkpoint)
              **/
-            std::byte * checkpoint_;
+            std::byte * checkpoint_ = nullptr;
             /** free pointer. memory in range [@ref free_, @ref limit_) available **/
             std::byte * free_ptr_ = nullptr;
             /** soft limit: end of committed virtual memory **/
diff --git a/xo-alloc/src/alloc/ArenaAlloc.cpp b/xo-alloc/src/alloc/ArenaAlloc.cpp
index febbcb61..0a0365e2 100644
--- a/xo-alloc/src/alloc/ArenaAlloc.cpp
+++ b/xo-alloc/src/alloc/ArenaAlloc.cpp
@@ -13,37 +13,101 @@
 #include <cassert>
 
 namespace xo {
+    using std::byte;
+
     namespace gc {
+        namespace {
+            /* alignment better be a power of 2 */
+            std::size_t
+            align_lub(std::size_t x, std::size_t align)
+            {
+                /* e.g:
+                 *   align = 4096, x%align = 100 -> dx = 3996
+                 *   align = 4096, x%align = 0   -> dx = 0
+                 */
+                std::size_t dx = (align - (x % align)) % align;
+
+                return x + dx;
+            }
+        }
+
         ArenaAlloc::ArenaAlloc(const std::string & name,
-                               std::size_t z, bool debug_flag)
+                               std::size_t z,
+                               bool debug_flag)
         {
             scope log(XO_DEBUG(debug_flag), xtag("name", name));
 
+            constexpr size_t c_hugepage_z = 2 * 1024 * 1024;
+
             this->name_       = name;
             this->page_z_     = getpagesize();
+            this->hugepage_z_ = c_hugepage_z;
 
-            // reserve virtual memory
+            // 1. need k pagetable entries where k is lub {k | k * .page_z >= z}
+            // 2. base will be aligned with .page_z but likely not with .hugepage_z
+            // 3. bad to have misalignment, because misaligned {prefix, suffix} of [base, base+z)
+            //    will use 4k pages instead of 2mb pages
+            //
+            // strategy:
+            // 4. round up z to multiple of c_hugepage_z
+            // 5. over-request so reserved range contains an aligned subrange of size z
+            // 6. unmap misaligned prefix
+            // 7. unmap misaligned suffix.
+            // 8. enable huge pages for now-aligned remainder of reserved range
+            //
+            // Z. note: rejecting inferior MAP_HUGETLB|MAP_HUGE_2MB flags on ::mmap here:
+            //    Za. requires previously-reserved memory in /proc/sys/vm/nr_hugepages
+            //    Zb. reserved pages permenently resident in RAM, never swapped
+            //    Zc. memory cost incurred even if no application is using said pages
 
-            void * base = mmap(nullptr, z, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            z = align_lub(z, c_hugepage_z); // 4.
+
+            // 5.
+            byte * base = reinterpret_cast<byte *>(::mmap(nullptr,
+                                                               z + c_hugepage_z,
+                                                               PROT_NONE,
+                                                               MAP_PRIVATE | MAP_ANONYMOUS,
+                                                               -1, 0));
 
             log && log("acquired memory [lo,hi) using mmap",
                        xtag("lo", base),
                        xtag("z", z),
-                       xtag("hi", reinterpret_cast<std::byte *>(base) + z));
-
-            // could use this as fallback..
-            //base         = (new std::byte [z]);
+                       xtag("hi", reinterpret_cast<byte *>(base) + z));
 
             if (base == MAP_FAILED) {
                 throw std::runtime_error(tostr("ArenaAlloc: uncommitted allocation failed",
                                                xtag("size", z)));
             }
 
-            this->lo_          = reinterpret_cast<std::byte *>(base);
+            byte * aligned_base = reinterpret_cast<byte *>(align_lub(reinterpret_cast<size_t>(base),
+                                                                     c_hugepage_z));
+
+            assert(reinterpret_cast<size_t>(aligned_base) % c_hugepage_z == 0);
+            assert(aligned_base >= base);
+            assert(aligned_base < base + c_hugepage_z);
+
+            if (base < aligned_base) {
+                size_t prefix = aligned_base - base;
+
+                ::munmap(base, prefix); // 6.
+            }
+
+            byte * aligned_hi = aligned_base + z;
+            byte * hi = base + z + c_hugepage_z;
+
+            if (aligned_hi < hi) {
+                size_t suffix = hi - aligned_hi;
+
+                ::munmap(aligned_hi, suffix); // 7.
+            }
+
+            ::madvise(aligned_base, z, MADV_HUGEPAGE); // 8.
+
+            this->lo_          = aligned_base;
             this->committed_z_ = 0;
             this->checkpoint_  = lo_;
             this->free_ptr_    = lo_;
-            this->limit_       = lo_ + z;
+            this->limit_       = lo_;
             this->hi_          = lo_ + z;
             this->debug_flag_  = debug_flag;
 
@@ -52,7 +116,9 @@ namespace xo {
                                                xtag("size", z)));
             }
 
-            log && log(xtag("lo", (void*)lo_), xtag("page_z", page_z_));
+            log && log(xtag("lo", (void*)lo_),
+                       xtag("page_z", page_z_),
+                       xtag("hugepage_z", hugepage_z_));
         }
 
         ArenaAlloc::~ArenaAlloc()
@@ -64,7 +130,7 @@ namespace xo {
             if (lo_) {
                 log && log("unmap [lo,hi)", xtag("lo", lo_), xtag("z", hi_ - lo_), xtag("hi", hi_));
 
-                munmap(lo_, hi_ - lo_);
+                ::munmap(lo_, hi_ - lo_);
             }
             // could use this as fallback if we dropped the uncommitted technique
             //delete [] this->lo_;
@@ -86,21 +152,6 @@ namespace xo {
                                                  z, debug_flag));
         }
 
-        namespace {
-            /* alignment better be a power of 2 */
-            std::size_t
-            align_lub(std::size_t x, std::size_t align)
-            {
-                /* e.g:
-                 *   align = 4096, x%align = 100 -> dx = 3996
-                 *   align = 4096, x%align = 0   -> dx = 0
-                 */
-                std::size_t dx = (align - (x % align)) % align;
-
-                return x + dx;
-            }
-        }
-
         bool
         ArenaAlloc::expand(size_t offset_z)
         {
@@ -118,7 +169,7 @@ namespace xo {
                                                xtag("requested", offset_z), xtag("reserved", reserved())));
             }
 
-            std::size_t aligned_offset_z = align_lub(offset_z, page_z_);
+            std::size_t aligned_offset_z = align_lub(offset_z, hugepage_z_);
             std::byte * commit_start = lo_ + committed_z_;
             std::size_t add_commit_z = aligned_offset_z - committed_z_;
 
@@ -130,7 +181,7 @@ namespace xo {
                        xtag("add_commit_z", add_commit_z),
                        xtag("commit_end", commit_start + add_commit_z));
 
-            if (mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) {
+            if (::mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) {
                 throw std::runtime_error(tostr("ArenaAlloc::expand: commit failure",
                                                xtag("committed_z", committed_z_),
                                                xtag("add_commit_z", add_commit_z)));
diff --git a/xo-reader/src/reader/reader.cpp b/xo-reader/src/reader/reader.cpp
index 6931cd5f..2ea9b4fd 100644
--- a/xo-reader/src/reader/reader.cpp
+++ b/xo-reader/src/reader/reader.cpp
@@ -5,7 +5,8 @@
 namespace xo {
     namespace scm {
         reader::reader(bool debug_flag) :
-            tokenizer_{debug_flag}, parser_{debug_flag}
+            tokenizer_{debug_flag},
+            parser_{debug_flag}
         {}
 
         void
@@ -29,7 +30,7 @@ namespace xo {
         }
 
         reader_result
-        reader::read_expr(const span_type & input_arg, bool eof)
+        reader::read_expr(const span_type & input_arg, bool eof_flag)
         {
             scope log(XO_DEBUG(this->debug_flag()));
 
@@ -38,20 +39,25 @@ namespace xo {
             /* input text-span consumed by this call.
              * Always comprises some number (possibly 0)
              * of complete tokens,  along with any leading
-             * whitespace
+             * whitespace.
+             *
+             * expr_span may also begin and end part way through
+             * distinct input lines
              */
             span_type expr_span = input.prefix(0ul);
 
             while (!input.empty()) {
-                /* each loop iterations reads one token */
+                /* each loop iteration reads one token */
 
-                /* read one token from input */
-                auto [tk, used_span, error1] = this->tokenizer_.scan2(input, eof);
+                /* read one token from input.
+                 * tokenizer stashes one line at a time, but used_span only
+                 * reports in used_span the portion representing the first token.
+                 */
+                auto [tk, used_span, error1] = this->tokenizer_.scan(input, eof_flag);
 
                 log && log(xtag("consumed", used_span));
                 log && log(xtag("input.pre", input));
 
-                input = this->tokenizer_.consume(used_span, input);
                 expr_span += used_span;
 
                 if (tk.is_valid()) {
@@ -76,7 +82,7 @@ namespace xo {
                                              expr_span, parser_.stack_size(), reader_error());
                     } else if (parser_result.is_error()) {
                         /* 1. parser detected error.
-                         * 2. tokenizer_.input_state() refers to position just after offending token
+                         * 2. tokenizer_.input_state().current_pos refers to position just after offending token
                          * 3. error_pos here is 0 because error detected at token boundary
                          */
                         reader_error error2(parser_result.error_src_function(),
@@ -122,7 +128,7 @@ namespace xo {
              * 1. input.empty (perhaps ate some whitespace,  ok)
              * 2. missing or incomplete token (ok unless eof)
              */
-            if (eof) {
+            if (eof_flag) {
                 if (parser_.has_incomplete_expr()) {
                     throw std::runtime_error
                         ("reader::read_expr"
diff --git a/xo-tokenizer/example/tokenrepl/tokenrepl.cpp b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp
index d6eacfea..bc73de3f 100644
--- a/xo-tokenizer/example/tokenrepl/tokenrepl.cpp
+++ b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp
@@ -29,6 +29,10 @@ main() {
     tokenizer_type tkz(xo::log_config::min_log_level <= xo::log_level::info);
     string input_str;
 
+    size_t line_no = 1;
+
+    constexpr std::size_t c_maxlines = 25;
+
     while (repl_getline(interactive, cin, cout, input_str)) {
         // we want tokenizer to see newline, it's syntax
         input_str.push_back('\n');
@@ -36,7 +40,7 @@ main() {
 
         // reminder: input may contain multiple tokens
         while (!input.empty()) {
-            auto [tk, consumed, error] = tkz.scan(input);
+            auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/);
 
             if (tk.is_valid()) {
                 cout << tk << endl;
@@ -47,29 +51,16 @@ main() {
                 break;
             }
 
-            input = tkz.consume(consumed, input);
+            input = input.after_prefix(consumed);
         }
 
         /* here: input.empty() or error encountered */
 
-        /* discard stashed remainder of input line
-         * (for nicely-formatted errors)
-         */
-        tkz.discard_current_line();
-    }
+        ++line_no;
 
-    {
-        span_type input = span_type::from_string(input_str);
-
-        auto [tk, consumed, error] = tkz.notify_eof(input);
-
-        input = tkz.consume(consumed, input);
-
-        if (tk.is_valid()) {
-            cout << tk << endl;
-        } else if (error.is_error()) {
-            cout << "parsing error: " << endl;
-            error.report(cout);
+        if (line_no > c_maxlines) {
+            cout << "always exit after " << c_maxlines << " lines of input" << endl;
+            break;
         }
     }
 }
diff --git a/xo-tokenizer/include/xo/tokenizer/input_state.hpp b/xo-tokenizer/include/xo/tokenizer/input_state.hpp
index 0e93512d..0cea1155 100644
--- a/xo-tokenizer/include/xo/tokenizer/input_state.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/input_state.hpp
@@ -9,9 +9,50 @@
 
 namespace xo {
     namespace scm {
+        /** enum to report outcome of @ref capture_current_line **/
+        enum class input_error {
+            /** normal return, input line successfully identified and captured **/
+            ok = 0,
+            /** incomplete input; should not have been submitted to @ref capture_current_line.
+             *  note: submit last line of input with eof_flag=true
+             **/
+            incomplete,
+            N
+        };
+
         /** @class input_state
          *  @brief Track detailed input position for use in error messages
          *
+         *  input characters fall into two categories:
+         *  - consumed: memory can be reclaimed/recycled
+         *  - buffered: memory will be retained unaltered until consumed
+         *
+         *  remarks:
+         *  - always in one of two states:
+         *    - empty
+         *    - contains exactly one line of input
+         *  - also record current input position.
+         *    Use this for example to identify where tokenizer rejected input.
+         *  - .current_pos advances by one token
+         *
+         *  - buffered characters always form a single contiguous range.
+         *  - input_state does not own any storage; storage is owned elsewhere
+         *
+         *  @text
+         *
+         *    <------------------.current_line------------------>
+         *                                   >  <-- .whitespace
+         *    cccccccccccccccccccccccccccccccc__TTTTTTTTxxxxxxxxx
+         *    ^                                 ^                ^
+         *    .current_line.lo                  |                .current_line.hi
+         *                           .current_pos
+         *
+         *    <----prev_line----> <----current_line---->
+         *                                   >  <--whitespace
+         *    ppppppppppppppppppp cccccccccccc__TTTTTTTT
+         *    ^
+         *
+         *  @endtext
          **/
         template <typename CharT>
         class input_state {
@@ -33,8 +74,11 @@ namespace xo {
             /** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
              *  Introduced for unit tests, not used in tokenizer.
              **/
-            explicit input_state(const span<const CharT>& current_line, size_t current_pos, size_t whitespace)
-                : current_line_{current_line}, current_pos_{current_pos}, whitespace_{whitespace} {}
+            explicit input_state(const span<const CharT>& current_line,
+                                 size_t current_pos,
+                                 size_t whitespace) : current_line_{current_line},
+                                                      current_pos_{current_pos},
+                                                      whitespace_{whitespace} {}
 
             ///@}
 
@@ -63,6 +107,7 @@ namespace xo {
 #endif
             const span_type & current_line() const { return current_line_; }
 #pragma GCC diagnostic pop
+            size_t tk_start() const { return tk_start_; }
             size_t current_pos() const { return current_pos_; }
             size_t whitespace() const { return whitespace_; }
             bool debug_flag() const { return debug_flag_; }
@@ -77,27 +122,65 @@ namespace xo {
              **/
             input_state rewind(std::size_t n) const;
 
-            /** Capture prefix of @p input up to first newline **/
-            void capture_current_line(const span_type & input);
+            /** Capture prefix of @p input up to first newline.
+             *  Set read position to start of line.
+             *
+             *  Alters:
+             *    .current_line
+             *    .current_pos
+             *
+             * Return pair comprising error code and input span representing first line
+             * (including trailing newline) from @p input.
+             **/
+            std::pair<input_error, span_type> capture_current_line(const span_type & input,
+                                                                   bool eof_flag);
+
+            /** atomically return current line while discarding it from input state
+             *
+             *  Alters
+             *    .current_line
+             *    .current_pos
+             *    .whitespace
+             **/
+            span_type consume_current_line();
 
             /** Reset input state for start of next line.
              *  Expression parser may use this to discard remainder of input line
              *  after a parsing error.
+             *
+             * Alters:
+             *   .current_line
+             *   .current_pos
+             *   .whitespace
              **/
             void discard_current_line();
 
-            /** Add @p z to current position **/
-            void consume(size_t z);
-
-            /** Skip prefix of input comprising whitespace.
-             *  Return pointer to first non-whitespace character in @p input,
-             *  or @c input.hi if input contains only whitespace.
-             *
-             *  if @p input contains any newlines, preserves suffix after last
-             *  such newilne in @p current_line_
+            /** Advance input position by @p z
              *
+             *  Alters:
+             *   .current_pos
              **/
-            const CharT * skip_leading_whitespace(const span_type & input);
+            void advance(size_t z);
+
+            /** Advance .current_pos to pos.
+             *  Require: pos in @ref current_line_
+             **/
+            void advance_until(const CharT * pos);
+
+            /** Skip prefix of input, starting at current read position,
+             *  comprising only whitespace.
+             *
+             *  Presume input position is at end of token;
+             *  on return @ref whitespace_ counts number of whitespace characters
+             *  skipped.
+             *
+             *  Return pointer to first non-whitespace character after @ref current_pos_
+             *  or @ref current_line_.hi if reached end of buffered line.
+             *
+             *  Alters:
+             *    .whitespace
+             **/
+            const CharT * skip_leading_whitespace();
 
             ///@}
 
@@ -107,7 +190,9 @@ namespace xo {
 
             /** remember current input line.  Used only to report errors **/
             span<const CharT> current_line_ = span<const CharT>();
-            /** current input position within @ref current_line_ **/
+            /** start of last token within @ref current_line_ **/
+            size_t tk_start_ = 0;
+            /** input position within @ref current_line_ **/
             size_t current_pos_ = 0;
             /** number of whitespace chars since end of preceding token,
              *  or last newline, whichever is less
@@ -149,7 +234,7 @@ namespace xo {
 
         template <typename CharT>
         void
-        input_state<CharT>::consume(size_t z) {
+        input_state<CharT>::advance(size_t z) {
             scope log(XO_DEBUG(debug_flag_));
 
             this->current_pos_ += z;
@@ -157,6 +242,28 @@ namespace xo {
             log && log(xtag("z", z), xtag("current_pos", current_pos_));
         }
 
+        template <typename CharT>
+        void
+        input_state<CharT>::advance_until(const CharT * pos) {
+            scope log(XO_DEBUG(debug_flag_));
+
+            assert(current_line_.lo() <= pos && pos < current_line_.hi());
+
+            this->current_pos_ = pos - current_line_.lo();
+
+            log && log(xtag("current_pos", current_pos_));
+        }
+
+        template <typename CharT>
+        auto
+        input_state<CharT>::consume_current_line() -> span_type {
+            span_type retval = current_line_;
+
+            this->discard_current_line();
+
+            return retval;
+        }
+
         template <typename CharT>
         void
         input_state<CharT>::discard_current_line() {
@@ -166,10 +273,14 @@ namespace xo {
         }
 
         template <typename CharT>
-        void
-        input_state<CharT>::capture_current_line(const span_type & input)
+        auto
+        input_state<CharT>::capture_current_line(const span_type & input,
+                                                 bool eof_flag) -> std::pair<input_error, span_type>
         {
             // see also discard_current_line()
+            // note: must capture entirety of first line,
+            //       for example including leading whitespace.
+            //       See discussion in tokenizer scan() method
 
             scope log(XO_DEBUG(debug_flag_));
 
@@ -177,44 +288,76 @@ namespace xo {
             const CharT * sol = input.lo();
             const CharT * eol = sol;
 
+            if (sol == current_line_.lo()) {
+                log && log("short-circuit - current line already stashed");
+
+                /* nothing to do here */
+                return std::make_pair(input_error::ok, current_line_);
+            }
+
             while ((eol < input.hi()) && (*eol != '\n'))
                 ++eol;
 
+            if (*eol == '\n') {
+                /* include \n at end-of-line */
+                ++eol;
+            } else {
+                if (!eof_flag) {
+                    /* caller expected to provide complete line of input. complain and ignore */
+                    return std::make_pair(input_error::incomplete,
+                                          input.prefix(0ul));
+                }
+            }
+
             this->current_line_ = span_type(sol, eol);
             this->current_pos_ = 0;
+            this->whitespace_ = 0;
 
             log && log(xtag("current_line", print::printspan(current_line_)),
                        xtag("current_pos", current_pos_));
+
+            return std::make_pair(input_error::ok,
+                                  span_type(sol, eol));
         }
 
         template <typename CharT>
         const CharT *
-        input_state<CharT>::skip_leading_whitespace(const span_type & input)
+        input_state<CharT>::skip_leading_whitespace()
         {
             scope log(XO_DEBUG(debug_flag_));
 
-            const CharT * ix = input.lo();
-
-            if (this->current_line().is_null()) {
-                this->capture_current_line(input);
-            }
+            const CharT * ix = current_line_.lo() + current_pos_;
 
             this->whitespace_ = 0;
 
             /* skip whitespace + remember beginning of most recent line */
-            while (is_whitespace(*ix) && (ix != input.hi())) {
-                if (is_newline(*ix)) {
-                    ++ix;
+            while (is_whitespace(*ix) && (ix != current_line_.hi())) {
+                ++ix;
 
-                    this->capture_current_line(span_type(ix, input.hi()));
-                } else {
-                    ++ix;
-
-                    ++(this->whitespace_);
-                }
+                ++(this->whitespace_);
             }
 
+            this->tk_start_ = ix - current_line_.lo();
+            this->current_pos_ = ix - current_line_.lo();
+
             return ix;
         }
+
+        template <typename CharT>
+        inline std::ostream &
+        operator<<(std::ostream & os,
+                   const input_state<CharT>& x)
+        {
+            using xo::print::unq;
+
+            os << "<input_state"
+            << xtag("tk", x.tk_start())
+            << xtag("pos", x.current_pos())
+            << xtag("line", unq(std::string_view(x.current_line().lo(), x.current_line().hi())))
+            << xtag("whitespace", x.whitespace())
+            << ">";
+
+            return os;
+        }
     }
 }
diff --git a/xo-tokenizer/include/xo/tokenizer/scan_result.hpp b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp
index fbc29105..79846d3c 100644
--- a/xo-tokenizer/include/xo/tokenizer/scan_result.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp
@@ -7,6 +7,7 @@
 
 #include "token.hpp"
 #include "tokenizer_error.hpp"
+#include "input_state.hpp"
 
 namespace xo {
     namespace scm {
@@ -17,11 +18,11 @@ namespace xo {
          *  Possible outcomes fall into several categories
          *  (with T: @c token_.is_valid(), E: @cerror_.is_error())
          *
-         *  | T     | E     | description       |
-         *  |-------+-------+-------------------|
-         *  | false | false | end of input      |
-         *  | true  | false | parsed token in T |
-         *  | false | true  | parse error in E  |
+         *  | T     | E     | description                         |
+         *  |-------+-------+-------------------------------------|
+         *  | false | false | end of input, including end of line |
+         *  | true  | false | parsed token in T                   |
+         *  | false | true  | parse error in E                    |
          *
          * @endcode
          **/
@@ -31,6 +32,7 @@ namespace xo {
             using token_type = token<CharT>;
             using span_type = span<const CharT>;
             using error_type = tokenizer_error<CharT>;
+            using input_state_type = input_state<CharT>;
 
         public:
             scan_result(const token_type & token,
@@ -40,7 +42,8 @@ namespace xo {
 
             static scan_result make_whitespace(const span_type & prefix_input);
             static scan_result make_partial(const span_type & prefix_input);
-            static scan_result make_error(const error_type & error);
+            static scan_result make_error(const error_type & error,
+                                          input_state_type & input_state_ref);
 
             bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
             bool is_token() const { return token_.is_valid(); }
@@ -51,7 +54,10 @@ namespace xo {
             const error_type & error() const { return error_; }
 
         public:
-            /** successfully parsed token, whenever tk_type != tokentype::tk_invalid **/
+            /** Successfully parsed token, whenever tk_type != tokentype::tk_invalid.
+             *  Will be tokentype::tk_invalid in normal cause of events for valid input,
+             *  when consuming whitespace
+             **/
             token_type token_;
             /** input span represented by .token, on success. Otherwise not defined **/
             span_type consumed_;
@@ -72,9 +78,14 @@ namespace xo {
         }
 
         template <typename CharT>
-        auto scan_result<CharT>::make_error(const error_type & error) -> scan_result
+        auto scan_result<CharT>::make_error(const error_type & error,
+                                            input_state_type & input_state_ref) -> scan_result
         {
-            return scan_result(token_type::invalid(), span_type::make_null(), error);
+            /* report+consume entire input line */
+
+            return scan_result(token_type::invalid(),
+                               input_state_ref.consume_current_line(),
+                               error);
         }
 
     } /*namespace scm*/
diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
index 0dd46877..2ea695dc 100644
--- a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp
@@ -99,22 +99,15 @@ namespace xo {
             static bool is_2char_punctuation(CharT ch);
 
             /** assemble token from text @p token_text.
-             *  @p token_text will often (but not always) represent a subset of @p input.
-             *  (For example consider multi-line string literals)
-             *  Also the span @p token_text may (in uncommon cases)
-             *  have been copied to separate storage from @p input
-             *
              *  @p initial_whitespace   Amount of whitespace input being consumed from input.
-             *  @p initial_token_prefix_from_input  Amount of non-whitespace input being
-             *  consumed from input. Not counting any stashed-and-already-consumed input
+             *  @p token_text subset of input_line representing a single token.
+             *  @p input_state input state containing input_line
              *
              *  retval.consumed will represent some possibly-empty prefix of @p input
              **/
             static result_type assemble_token(std::size_t initial_whitespace,
-                                              std::size_t initial_token_prefix_from_input,
                                               const span_type & token_text,
-                                              const span_type & input,
-                                              const input_state_type & input_state);
+                                              input_state_type & input_state);
 
             /** degenerate version of assemble_token() on reaching end-of-file **/
             static result_type assemble_final_token(const span_type & token_text,
@@ -136,35 +129,14 @@ namespace xo {
              *
              *  @return {parsed token, consumed span}
              **/
-            result_type scan(const span_type & input);
-
-            /** When eof is false, same as scan(input).
-             *  When eof is true and scan(input) does not report a token,
-             *  return notify_eof()
-             **/
-            result_type scan2(const span_type & input, bool eof);
-
-            /** @retval span with @p consumed permanently removed from @p input.
-             *
-             *  Purpose of this method is to update @ref current_pos_.
-             **/
-            span_type consume(const span_type & consumed, const span_type & input);
+            result_type scan(const span_type & input,
+                             bool eof_flag);
 
             /** discard current line after error.  Just cleans up error-reporting state **/
             void discard_current_line();
 
-            /** notify end of input,  resolving any ambiguous input stashed in .prefix
-             **/
-            result_type notify_eof(const span_type & input);
-
             ///@}
 
-        private:
-            result_type scan_completion(const span_type & whitespace,
-                                        const CharT* token_end,
-                                        const span_type & input,
-                                        const input_state_type & input_state);
-
         private:
             /** @defgroup tokenizer-instance-vars tokenizer instance variables **/
             ///@{
@@ -283,19 +255,16 @@ namespace xo {
         template <typename CharT>
         auto
         tokenizer<CharT>::assemble_token(std::size_t initial_whitespace,
-                                         std::size_t initial_token_prefix_from_input,
                                          const span_type & token_text,
-                                         const span_type & input,
-                                         const input_state_type & input_state) -> result_type
+                                         input_state_type & input_state_ref) -> result_type
         {
             /* literal|pretty|streamlined */
             log_config::style = function_style::streamlined;
 
-            scope log(XO_DEBUG(input_state.debug_flag()));
+            scope log(XO_DEBUG(input_state_ref.debug_flag()));
             log && log(xtag("token_text", token_text),
                        xtag("initial_whitespace", initial_whitespace),
-                       xtag("initial_token_prefix_from_input", initial_token_prefix_from_input),
-                       xtag("input", input));
+                       xtag("input_state", input_state_ref));
 
             tokentype tk_type = tokentype::tk_invalid;
             std::string tk_text;
@@ -394,17 +363,19 @@ namespace xo {
                                 return result_type::make_error
                                     (error_type(__FUNCTION__ /*src_function*/,
                                                 "improperly placed sign indicator",
-                                                input_state,
+                                                input_state_ref,
                                                 (ix - tk_start)
-                                        ));
+                                                ),
+                                     input_state_ref);
                             }
                         } else if (*ix == '.') {
                             if (period_flag) {
                                 return result_type::make_error
                                     (error_type(__FUNCTION__ /*src_function*/,
                                                 "duplicate decimal point in numeric literal",
-                                                input_state,
-                                                (ix - tk_start)));
+                                                input_state_ref,
+                                                (ix - tk_start)),
+                                     input_state_ref);
                             }
 
                             period_flag = true;
@@ -413,8 +384,9 @@ namespace xo {
                                 return result_type::make_error
                                     (error_type(__FUNCTION__ /*src_function*/,
                                                 "duplicate exponent marker in numeric literal",
-                                                input_state,
-                                                (ix - tk_start)));
+                                                input_state_ref,
+                                                (ix - tk_start)),
+                                     input_state_ref);
                             }
 
                             exponent_flag = true;
@@ -429,8 +401,9 @@ namespace xo {
                             return result_type::make_error
                                 (error_type(__FUNCTION__ /*src_function*/,
                                             "unexpected character in numeric constant" /*error_description*/,
-                                            input_state,
-                                            (ix - tk_start)));
+                                            input_state_ref,
+                                            (ix - tk_start)),
+                                 input_state_ref);
                         }
                     }
 
@@ -532,8 +505,9 @@ namespace xo {
                             return result_type::make_error
                                 (error_type(__FUNCTION__ /*src_function*/,
                                             "expecting key following escape character \\",
-                                            input_state,
-                                            (ix - tk_start)));
+                                            input_state_ref,
+                                            (ix - tk_start)),
+                                 input_state_ref);
                         }
 
                         switch(*ix) {
@@ -561,8 +535,9 @@ namespace xo {
                             return result_type::make_error
                                 (error_type(__FUNCTION__ /*src_function*/,
                                             "expecting one of n|r|\"|\\ following escape \\",
-                                            input_state,
-                                            (ix - tk_start)));
+                                            input_state_ref,
+                                            (ix - tk_start)),
+                                 input_state_ref);
                         }
                         break;
                     default:
@@ -578,8 +553,9 @@ namespace xo {
                     return result_type::make_error
                         (error_type(__FUNCTION__ /*src_function*/,
                                     "missing terminating '\"' to complete literal string",
-                                    input_state,
-                                    (ix - tk_start)));
+                                    input_state_ref,
+                                    (ix - tk_start)),
+                         input_state_ref);
                 }
 
                 log && log(tostr("tokenizer::assemble_token",
@@ -720,8 +696,9 @@ namespace xo {
                 return result_type::make_error
                     (error_type(__FUNCTION__ /*src_function*/,
                                 "illegal input character",
-                                input_state,
-                                (ix - tk_start)));
+                                input_state_ref,
+                                (ix - tk_start)),
+                     input_state_ref);
             }
 
             if ((tk_type == tokentype::tk_i64)
@@ -771,8 +748,11 @@ namespace xo {
                     tk_text.clear();
             }
 
+            /* input.prefix(0):
+             * require caller preserves current input line until it's entirely exhausted
+             */
             return result_type(token_type(tk_type, std::move(tk_text)),
-                               input.prefix(initial_whitespace + initial_token_prefix_from_input));
+                               input_state_ref.current_line().prefix(0));
         } /*assemble_token*/
 
         /* TODO: input_state_ as argument ? */
@@ -782,67 +762,44 @@ namespace xo {
                                                const input_state_type & input_state) -> result_type
         {
             return assemble_token(0 /*initial_whitespace*/,
-                                  0 /*initial_token_prefix_from_input*/,
                                   token_text,
-                                  span_type::make_null(),
                                   input_state);
         }
 
-        /* TODO: prefix_, input_state_ as arguments */
         template <typename CharT>
         auto
-        tokenizer<CharT>::scan_completion(const span_type & whitespace,
-                                          const CharT* token_end,
-                                          const span_type & input,
-                                          const input_state_type & input_state) -> result_type {
-
-            auto token_span = input.after_prefix(whitespace).prefix_upto(token_end);
-
-            if (this->prefix_.empty()) {
-                return assemble_token(whitespace.size(),
-                                      token_span.size() /*initial_token_prefix_from_input*/,
-                                      token_span,
-                                      input,
-                                      input_state);
-            } else {
-                /* whatever we stashed in .prefix_, should be consumed from input.
-                 * control here implies reached end of input with either
-                 * - input for which parsing outcome depends on existence of more input,
-                 *   and presence of eof now resolves
-                 * - malformed input (that might represent prefix of a valid token.  Say "#incl" in C)
-                 *
-                 * That means stashed .prefix will represent copied range of characters that
-                 * ends at the same position as input
-                 */
-                return result_type::make_partial(input);
-            }
-
-        }
-
-#ifdef NOT_USING
-        template <typename CharT>
-        void
-        tokenizer<CharT>::capture_current_line(const span_type & input)
-        {
-            this->input_state_.capture_current_line(input);
-        }
-#endif
-
-        template <typename CharT>
-        auto
-        tokenizer<CharT>::scan(const span_type & input) -> result_type
+        tokenizer<CharT>::scan(const span_type & input, bool eof_flag) -> result_type
         {
             scope log(XO_DEBUG(input_state_.debug_flag()));
 
             log && log(xtag("input", input));
 
-            const CharT * ix = this->input_state_.skip_leading_whitespace(input);
+            /* - Always at beginning of token when scan() invoked
+             * - scan will not report any portion of line as consumed until it has
+             *   emitted all tokens in that line.
+             *   rationale: caller is allowed to discard storage that
+             *   scan() reports as consumed. But will be holding that line
+             *   until all tokens have been read.
+             * - this means caller will typically call scan()
+             *   with the same input span multiple times
+             */
+
+            /* automagically no-ops when the same input presented twice */
+            this->input_state_.capture_current_line(input, eof_flag);
+
+            const CharT * ix = this->input_state_.skip_leading_whitespace();
 
             if(ix == input.hi()) {
-                /* no-op */
-                return result_type::make_whitespace(input.prefix_upto(ix));
+                log && log("end input -> consume current line");
+
+                /* entirety of current line has been tokenized
+                 *  -> caller may consume it
+                 */
+                return result_type::make_whitespace(this->input_state_.consume_current_line());
             }
 
+            /* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */
+
             // TODO:
             // 1. hoist complete_flag up here
             // 2. use in each branch
@@ -850,9 +807,9 @@ namespace xo {
 
             /* here: *ix is not whitespace */
 
-            auto whitespace_span = input.prefix_upto(ix);
+            auto whitespace_z = input_state_.whitespace();
 
-            log && log(xtag("whitespace.size", input_state_.whitespace()));
+            log && log(xtag("whitespace_z", whitespace_z));
 
             /* tk_start points to known beginning of token
              * (after any whitespace)
@@ -871,12 +828,15 @@ namespace xo {
 
                 ++ix;
 
+#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit
                 if (ix == input.hi()) {
                     /* need more input to know if/when token complete */
                     this->prefix_ += std::string(tk_start, input.hi());
 
                     log && log(xtag("captured-prefix1", this->prefix_));
-                } else {
+                } else
+#endif
+                    {
                     CharT ch2 = *ix;
 
                     if (((ch2 >= '0') && (ch2 <= '9'))
@@ -909,21 +869,28 @@ namespace xo {
                             break;
                         }
                     } else if ((*ix == '\n') || (*ix == '\r')) {
+                        log && log ("string literal with naked newline or CR");
+
                         return result_type::make_error
                             (error_type(__FUNCTION__ /*src_function*/,
                                         "must use \\n or \\r to encode newline/cr in string literal",
                                         input_state_,
-                                        (ix - tk_start)));
+                                        (ix - tk_start)),
+                             this->input_state_);
                     }
 
                     prev_ch = *ix;
                 }
 
                 if (!complete_flag) {
-                    /* need more input to know if/when token complete */
-                    this->prefix_ += std::string(tk_start, input.hi());
+                    log && log("unterminated string literal");
 
-                    log && log(xtag("captured-prefix2", this->prefix_));
+                    return result_type::make_error
+                               (error_type(__FUNCTION__ /*src_function*/,
+                                           "unterminated string literal",
+                                           input_state_,
+                                           (ix - tk_start)),
+                                this->input_state_);
                 }
             } else {
                 /* ix is start of some token */
@@ -941,8 +908,13 @@ namespace xo {
                             /* include next char and complete token */
                             ++ix;
 
-                            return scan_completion(whitespace_span, ix /*token_end*/, input,
-                                                   this->input_state_);
+                            log && log("complete '->' token");
+
+                            this->input_state_.advance_until(ix);
+
+                            return assemble_token(whitespace_z,
+                                                  span_type(tk_start, ix) /*token*/,
+                                                  input_state_);
                         }
 
                         /* here: -123, -.5e-21 for example */
@@ -959,9 +931,14 @@ namespace xo {
                         CharT ch2 = *ix;
 
                         if (ch2 != '=') {
+                            log && log("complete '>=' token");
+
+                            this->input_state_.advance_until(ix);
+
                             /* ignore next char and complete token */
-                            return scan_completion(whitespace_span, ix /*token_end*/, input,
-                                                   this->input_state_);
+                            return assemble_token(whitespace_z,
+                                                  span_type(tk_start, ix) /*token*/,
+                                                  this->input_state_);
                         }
 
                         /* here: >= for example */
@@ -1003,18 +980,28 @@ namespace xo {
                     }
                 }
 
+#ifdef OBSOLETE
                 if (ix == input.hi()) {
                     /* need more input to know if/when token complete */
                     this->prefix_ += std::string(tk_start, input.hi());
 
                     log && log(xtag("captured-prefix5", this->prefix_));
                 }
+#endif
             }
 
-            return scan_completion(whitespace_span, ix /*token_end*/, input,
-                                   this->input_state_);
+            log && log("assemble token z", xtag("token_z", ix - tk_start));
+
+            assert(tk_start < ix);
+
+            this->input_state_.advance_until(ix);
+
+            return assemble_token(whitespace_z,
+                                  span_type(tk_start, ix) /*token*/,
+                                  this->input_state_);
         } /*scan*/
 
+#ifdef OBSOLETE
         template <typename CharT>
         auto
         tokenizer<CharT>::scan2(const span_type & input, bool eof) -> result_type {
@@ -1039,15 +1026,19 @@ namespace xo {
                                span_type::concat(sr.consumed(), sr2.consumed()),
                                sr2.error());
         }
+#endif
 
+#ifdef OBSOLETE
         template <typename CharT>
         auto
-        tokenizer<CharT>::consume(const span_type & consumed, const span_type & input) -> span_type
+        tokenizer<CharT>::consume(const span_type & consumed,
+                                  const span_type & input) -> span_type
         {
             this->input_state_.consume(consumed.size());
 
             return input.after_prefix(consumed);
         }
+#endif
 
         template <typename CharT>
         void
@@ -1056,6 +1047,7 @@ namespace xo {
             this->input_state_.discard_current_line();
         }
 
+#ifdef OBSOLETE
         template <typename CharT>
         auto
         tokenizer<CharT>::notify_eof(const span_type & input) -> result_type {
@@ -1063,20 +1055,12 @@ namespace xo {
 
             log && log(xtag("prefix_", prefix_), xtag("prefix_.size", prefix_.size()), xtag("input", input));
 
-            if (this->prefix_.empty()) {
-                /* almost meretricious to include input here,
-                 * when called from scan2() it can only be whitespace
-                 */
-                return result_type::make_whitespace(input);
-            } else {
-                auto retval = assemble_final_token(span_type::from_string(prefix_),
-                                                   this->input_state_);
-
-                this->prefix_.clear();
-
-                return retval;
-            }
+            /* almost meretricious to include input here,
+             * when called from scan2() it can only be whitespace
+             */
+            return result_type::make_whitespace(input);
         } /*notify_eof*/
+#endif
     } /*namespace scm*/
 } /*namespace xo*/
 
diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp
index ebcf2a0f..6a673e53 100644
--- a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp
+++ b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp
@@ -121,22 +121,22 @@ namespace xo {
 
             if (!error_description_.empty()) {
                 const char * prefix = "input: ";
-                /* input_state.current_pos: position of first character following preceding token.
-                 * input_state.whitespace:  whitespace between current_pos and start of failing token
+                /* input_state.tk_start:    position of first character in token
+                 * input_state.current_pos: position of first character following preceding token.
                  * error_pos:               position (relative to start) at which failure detected
                  */
-                const size_t tk_start = input_state_.current_pos() + input_state_.whitespace();
+                const size_t tk_start = input_state_.tk_start();
                 const size_t tk_indent = (strlen(prefix) + tk_start);
                 const size_t error_pos = 1 + tk_start + error_pos_;
 
-                os << "char: " << error_pos << endl;
+                os << "token col: " << tk_start << ", error col: " << error_pos << "\n";
                 os << prefix;
                 for (const char *p = input_state_.current_line().lo(),
                          *e = input_state_.current_line().hi(); p < e; ++p)
                 {
                     os << *p;
                 }
-                os << endl;
+                //os << endl;
                 os << std::setw(tk_indent) << " ";
 
                 for (size_t i = 0; i < error_pos_; ++i) {
diff --git a/xo-tokenizer/utest/tokenizer.test.cpp b/xo-tokenizer/utest/tokenizer.test.cpp
index ec7d394f..604b9d25 100644
--- a/xo-tokenizer/utest/tokenizer.test.cpp
+++ b/xo-tokenizer/utest/tokenizer.test.cpp
@@ -232,7 +232,7 @@ namespace xo {
                         in_span(testcase.input_.c_str(),
                                 testcase.input_.c_str() + testcase.input_.size());
 
-                    auto sr = tkz.scan2(in_span, true /*eof*/);
+                    auto sr = tkz.scan(in_span, true /*eof*/);
 
                     REHEARSE(rh, sr.get_token().tk_type() == testcase.expected_tk_.tk_type());
                     if (sr.get_token().tk_type() == tokentype::tk_i64)
@@ -408,7 +408,7 @@ namespace xo {
                     {
                         log && log(xtag("i_tk", i_tk));
 
-                        auto sr = tkz.scan2(in_span, in_span.empty());
+                        auto sr = tkz.scan(in_span, in_span.empty());
                         const auto & tk = sr.get_token();
 
                         if (tk.is_valid()) {
@@ -454,6 +454,8 @@ namespace xo {
             make_testcase(const char * input, const char * src_function, const char * error_descr,
                           size_t tk_start, size_t whitespace, size_t error_pos)
             {
+                size_t line_no = 1;
+
                 testcase_error retval;
                 retval.input_ = input;
                 retval.expect_error_ = tkz_error_type(src_function, error_descr,
@@ -548,7 +550,7 @@ namespace xo {
 
                         auto in_span = tokenizer::span_type::from_string(testcase.input_);
 
-                        auto sr = tkz.scan2(in_span, true /*eof*/);
+                        auto sr = tkz.scan(in_span, true /*eof*/);
 
                         REHEARSE(rh, sr.is_error());