xo-tokenizer: refactor to correct accounting for line/consume/errpos
This commit is contained in:
parent
7f1afac903
commit
84c5a75b28
9 changed files with 501 additions and 243 deletions
|
|
@ -18,11 +18,11 @@ namespace xo {
|
||||||
* allocation order:
|
* allocation order:
|
||||||
* ----------------------->
|
* ----------------------->
|
||||||
*
|
*
|
||||||
* <----------------- .size() ------------------>
|
* <----------------- .size(), .reserved() --------------------------->
|
||||||
* <----------------- .committed() --------------->
|
* <----------------- .committed() ------------->
|
||||||
*
|
*
|
||||||
* <-------allocated------><--------free--------> <---uncommitted---->
|
* <-------allocated------><--------free--------><-----uncommitted---->
|
||||||
* XXXXXXXXXXXXXXXXXXXXXXXX______________________ ....................
|
* XXXXXXXXXXXXXXXXXXXXXXXX______________________......................
|
||||||
* ^ ^ ^ ^ ^
|
* ^ ^ ^ ^ ^
|
||||||
* lo checkpoint free limit hi
|
* lo checkpoint free limit hi
|
||||||
*
|
*
|
||||||
|
|
@ -31,12 +31,77 @@ namespace xo {
|
||||||
* > < .before_checkpoint()
|
* > < .before_checkpoint()
|
||||||
* > < .after_checkpoint()
|
* > < .after_checkpoint()
|
||||||
*
|
*
|
||||||
|
* lifetime:
|
||||||
|
*
|
||||||
|
* 1. initial state after ctor
|
||||||
|
*
|
||||||
|
* >< committed()=0
|
||||||
|
* <---------------------------uncommitted---------------------------->
|
||||||
|
* ....................................................................
|
||||||
|
* ^ ^
|
||||||
|
* lo hi
|
||||||
|
* checkpoint
|
||||||
|
* free
|
||||||
|
* limit
|
||||||
|
*
|
||||||
|
* 1a. one call to ::mmap()
|
||||||
|
* 1b. vm address space [lo,hi) is reserved
|
||||||
|
* 1c. address space [lo,hi) is inaccessible. no read|write|execute permission
|
||||||
|
*
|
||||||
|
* 2. after first allocation of n bytes
|
||||||
|
*
|
||||||
|
* <--committed--->
|
||||||
|
* <--free--><--------------------uncommitted-------------------->
|
||||||
|
* > <- allocated
|
||||||
|
* XXXXXX__________.....................................................
|
||||||
|
* ^ ^ ^ ^
|
||||||
|
* lo lo+n limit hi
|
||||||
|
* ^ free
|
||||||
|
* checkpoint
|
||||||
|
*
|
||||||
|
* 2a. committed just enough hugepages (2mb each) to accomodate n,
|
||||||
|
* i.e. expand-on-demand:
|
||||||
|
* - one call to ::mprotect()
|
||||||
|
* - .limit = .lo + (k+1) * .hugepage_z for some integer k>=0
|
||||||
|
* - k * .page_z <= n < (k+1) * .hugepage_z
|
||||||
|
* 2b. expect immediate cost 1-5us, includes:
|
||||||
|
* - TLB flush
|
||||||
|
* invalidate TLB entries for committed range on all cores that this
|
||||||
|
* process' threads have run on since process inception.
|
||||||
|
* Also, if a kernel thread has run on one of said cores, it may
|
||||||
|
* have borrowed our TLB entries
|
||||||
|
* - page table update
|
||||||
|
* write to entry for each vm page
|
||||||
|
* - kernel overhead 100-1000 cycles (< 1us)
|
||||||
|
* 2c. expect deferred cost 1us-2us per hugepage:
|
||||||
|
* - committed pages aren't backed by physical memory until
|
||||||
|
* first touched; minor page fault on first access for each page.
|
||||||
|
* - so about 256-512us for 1MB
|
||||||
|
* 3. after .expand(z)
|
||||||
|
*
|
||||||
|
* <-------------committed------------>
|
||||||
|
* <------------free------------><----------uncomitted----------->
|
||||||
|
* > <- allocated
|
||||||
|
* XXXXXX______________________________.................................
|
||||||
|
* ^ ^ ^ ^
|
||||||
|
* lo lo+n limit hi
|
||||||
|
* ^ free
|
||||||
|
* checkpoint
|
||||||
|
*
|
||||||
|
* 3a. same as case 2. but without advancing .free pointer.
|
||||||
|
*
|
||||||
|
* 4. after dtor
|
||||||
|
*
|
||||||
|
* 4a. all memory returned to o/s, no longer reserved.
|
||||||
|
* - one call to ::munmap()
|
||||||
|
*
|
||||||
* @endtext
|
* @endtext
|
||||||
*
|
*
|
||||||
* Design Notes:
|
* Design Notes:
|
||||||
* - non-copyable, non-moveable
|
* - non-copyable, non-moveable
|
||||||
* - always heap-allocated
|
|
||||||
* - @ref lo_ <= @ref checkpoint_ <= @ref free_ <= @ref limit_ <= @ref hi_
|
* - @ref lo_ <= @ref checkpoint_ <= @ref free_ <= @ref limit_ <= @ref hi_
|
||||||
|
* - memory for ArenaAlloc itself (not the memory it allocates), ~100 bytes
|
||||||
|
* always heap allocated. Use ArenaAlloc::make()
|
||||||
* - memory obtained from mmap(), not heap
|
* - memory obtained from mmap(), not heap
|
||||||
* - memory addresses are stable. Expand storage by committing VM pages.
|
* - memory addresses are stable. Expand storage by committing VM pages.
|
||||||
* - @ref lo_ is aligned on VM page size (guaranteed by mmap())
|
* - @ref lo_ is aligned on VM page size (guaranteed by mmap())
|
||||||
|
|
@ -55,7 +120,7 @@ namespace xo {
|
||||||
|
|
||||||
/** Create allocator with capacity @p z,
|
/** Create allocator with capacity @p z,
|
||||||
* Reserve memory addresses for @p z bytes,
|
* Reserve memory addresses for @p z bytes,
|
||||||
* but don't commit them until needed
|
* (but don't commit them until needed)
|
||||||
**/
|
**/
|
||||||
static up<ArenaAlloc> make(const std::string & name,
|
static up<ArenaAlloc> make(const std::string & name,
|
||||||
std::size_t z,
|
std::size_t z,
|
||||||
|
|
@ -127,7 +192,12 @@ namespace xo {
|
||||||
std::string name_;
|
std::string name_;
|
||||||
|
|
||||||
/** size of a VM page (from getpagesize()) **/
|
/** size of a VM page (from getpagesize()) **/
|
||||||
std::size_t page_z_;
|
std::size_t page_z_ = 0;
|
||||||
|
|
||||||
|
/** size of a huge VM page. hardwiring this in ctor (to 2MB).
|
||||||
|
* larger pages relieve pressure on TLB, but suboptimal if use << 2MB
|
||||||
|
**/
|
||||||
|
std::size_t hugepage_z_ = 0;
|
||||||
|
|
||||||
/** allocator owns memory in range [@ref lo_, @ref hi_) **/
|
/** allocator owns memory in range [@ref lo_, @ref hi_) **/
|
||||||
std::byte * lo_ = nullptr;
|
std::byte * lo_ = nullptr;
|
||||||
|
|
@ -139,7 +209,7 @@ namespace xo {
|
||||||
* older (addresses below checkpoint)
|
* older (addresses below checkpoint)
|
||||||
* and younger (addresses above checkpoint)
|
* and younger (addresses above checkpoint)
|
||||||
**/
|
**/
|
||||||
std::byte * checkpoint_;
|
std::byte * checkpoint_ = nullptr;
|
||||||
/** free pointer. memory in range [@ref free_, @ref limit_) available **/
|
/** free pointer. memory in range [@ref free_, @ref limit_) available **/
|
||||||
std::byte * free_ptr_ = nullptr;
|
std::byte * free_ptr_ = nullptr;
|
||||||
/** soft limit: end of committed virtual memory **/
|
/** soft limit: end of committed virtual memory **/
|
||||||
|
|
|
||||||
|
|
@ -13,37 +13,101 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
namespace xo {
|
namespace xo {
|
||||||
|
using std::byte;
|
||||||
|
|
||||||
namespace gc {
|
namespace gc {
|
||||||
|
namespace {
|
||||||
|
/* alignment better be a power of 2 */
|
||||||
|
std::size_t
|
||||||
|
align_lub(std::size_t x, std::size_t align)
|
||||||
|
{
|
||||||
|
/* e.g:
|
||||||
|
* align = 4096, x%align = 100 -> dx = 3996
|
||||||
|
* align = 4096, x%align = 0 -> dx = 0
|
||||||
|
*/
|
||||||
|
std::size_t dx = (align - (x % align)) % align;
|
||||||
|
|
||||||
|
return x + dx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ArenaAlloc::ArenaAlloc(const std::string & name,
|
ArenaAlloc::ArenaAlloc(const std::string & name,
|
||||||
std::size_t z, bool debug_flag)
|
std::size_t z,
|
||||||
|
bool debug_flag)
|
||||||
{
|
{
|
||||||
scope log(XO_DEBUG(debug_flag), xtag("name", name));
|
scope log(XO_DEBUG(debug_flag), xtag("name", name));
|
||||||
|
|
||||||
|
constexpr size_t c_hugepage_z = 2 * 1024 * 1024;
|
||||||
|
|
||||||
this->name_ = name;
|
this->name_ = name;
|
||||||
this->page_z_ = getpagesize();
|
this->page_z_ = getpagesize();
|
||||||
|
this->hugepage_z_ = c_hugepage_z;
|
||||||
|
|
||||||
// reserve virtual memory
|
// 1. need k pagetable entries where k is lub {k | k * .page_z >= z}
|
||||||
|
// 2. base will be aligned with .page_z but likely not with .hugepage_z
|
||||||
|
// 3. bad to have misalignment, because misaligned {prefix, suffix} of [base, base+z)
|
||||||
|
// will use 4k pages instead of 2mb pages
|
||||||
|
//
|
||||||
|
// strategy:
|
||||||
|
// 4. round up z to multiple of c_hugepage_z
|
||||||
|
// 5. over-request so reserved range contains an aligned subrange of size z
|
||||||
|
// 6. unmap misaligned prefix
|
||||||
|
// 7. unmap misaligned suffix.
|
||||||
|
// 8. enable huge pages for now-aligned remainder of reserved range
|
||||||
|
//
|
||||||
|
// Z. note: rejecting inferior MAP_HUGETLB|MAP_HUGE_2MB flags on ::mmap here:
|
||||||
|
// Za. requires previously-reserved memory in /proc/sys/vm/nr_hugepages
|
||||||
|
// Zb. reserved pages permenently resident in RAM, never swapped
|
||||||
|
// Zc. memory cost incurred even if no application is using said pages
|
||||||
|
|
||||||
void * base = mmap(nullptr, z, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
z = align_lub(z, c_hugepage_z); // 4.
|
||||||
|
|
||||||
|
// 5.
|
||||||
|
byte * base = reinterpret_cast<byte *>(::mmap(nullptr,
|
||||||
|
z + c_hugepage_z,
|
||||||
|
PROT_NONE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||||
|
-1, 0));
|
||||||
|
|
||||||
log && log("acquired memory [lo,hi) using mmap",
|
log && log("acquired memory [lo,hi) using mmap",
|
||||||
xtag("lo", base),
|
xtag("lo", base),
|
||||||
xtag("z", z),
|
xtag("z", z),
|
||||||
xtag("hi", reinterpret_cast<std::byte *>(base) + z));
|
xtag("hi", reinterpret_cast<byte *>(base) + z));
|
||||||
|
|
||||||
// could use this as fallback..
|
|
||||||
//base = (new std::byte [z]);
|
|
||||||
|
|
||||||
if (base == MAP_FAILED) {
|
if (base == MAP_FAILED) {
|
||||||
throw std::runtime_error(tostr("ArenaAlloc: uncommitted allocation failed",
|
throw std::runtime_error(tostr("ArenaAlloc: uncommitted allocation failed",
|
||||||
xtag("size", z)));
|
xtag("size", z)));
|
||||||
}
|
}
|
||||||
|
|
||||||
this->lo_ = reinterpret_cast<std::byte *>(base);
|
byte * aligned_base = reinterpret_cast<byte *>(align_lub(reinterpret_cast<size_t>(base),
|
||||||
|
c_hugepage_z));
|
||||||
|
|
||||||
|
assert(reinterpret_cast<size_t>(aligned_base) % c_hugepage_z == 0);
|
||||||
|
assert(aligned_base >= base);
|
||||||
|
assert(aligned_base < base + c_hugepage_z);
|
||||||
|
|
||||||
|
if (base < aligned_base) {
|
||||||
|
size_t prefix = aligned_base - base;
|
||||||
|
|
||||||
|
::munmap(base, prefix); // 6.
|
||||||
|
}
|
||||||
|
|
||||||
|
byte * aligned_hi = aligned_base + z;
|
||||||
|
byte * hi = base + z + c_hugepage_z;
|
||||||
|
|
||||||
|
if (aligned_hi < hi) {
|
||||||
|
size_t suffix = hi - aligned_hi;
|
||||||
|
|
||||||
|
::munmap(aligned_hi, suffix); // 7.
|
||||||
|
}
|
||||||
|
|
||||||
|
::madvise(aligned_base, z, MADV_HUGEPAGE); // 8.
|
||||||
|
|
||||||
|
this->lo_ = aligned_base;
|
||||||
this->committed_z_ = 0;
|
this->committed_z_ = 0;
|
||||||
this->checkpoint_ = lo_;
|
this->checkpoint_ = lo_;
|
||||||
this->free_ptr_ = lo_;
|
this->free_ptr_ = lo_;
|
||||||
this->limit_ = lo_ + z;
|
this->limit_ = lo_;
|
||||||
this->hi_ = lo_ + z;
|
this->hi_ = lo_ + z;
|
||||||
this->debug_flag_ = debug_flag;
|
this->debug_flag_ = debug_flag;
|
||||||
|
|
||||||
|
|
@ -52,7 +116,9 @@ namespace xo {
|
||||||
xtag("size", z)));
|
xtag("size", z)));
|
||||||
}
|
}
|
||||||
|
|
||||||
log && log(xtag("lo", (void*)lo_), xtag("page_z", page_z_));
|
log && log(xtag("lo", (void*)lo_),
|
||||||
|
xtag("page_z", page_z_),
|
||||||
|
xtag("hugepage_z", hugepage_z_));
|
||||||
}
|
}
|
||||||
|
|
||||||
ArenaAlloc::~ArenaAlloc()
|
ArenaAlloc::~ArenaAlloc()
|
||||||
|
|
@ -64,7 +130,7 @@ namespace xo {
|
||||||
if (lo_) {
|
if (lo_) {
|
||||||
log && log("unmap [lo,hi)", xtag("lo", lo_), xtag("z", hi_ - lo_), xtag("hi", hi_));
|
log && log("unmap [lo,hi)", xtag("lo", lo_), xtag("z", hi_ - lo_), xtag("hi", hi_));
|
||||||
|
|
||||||
munmap(lo_, hi_ - lo_);
|
::munmap(lo_, hi_ - lo_);
|
||||||
}
|
}
|
||||||
// could use this as fallback if we dropped the uncommitted technique
|
// could use this as fallback if we dropped the uncommitted technique
|
||||||
//delete [] this->lo_;
|
//delete [] this->lo_;
|
||||||
|
|
@ -86,21 +152,6 @@ namespace xo {
|
||||||
z, debug_flag));
|
z, debug_flag));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
/* alignment better be a power of 2 */
|
|
||||||
std::size_t
|
|
||||||
align_lub(std::size_t x, std::size_t align)
|
|
||||||
{
|
|
||||||
/* e.g:
|
|
||||||
* align = 4096, x%align = 100 -> dx = 3996
|
|
||||||
* align = 4096, x%align = 0 -> dx = 0
|
|
||||||
*/
|
|
||||||
std::size_t dx = (align - (x % align)) % align;
|
|
||||||
|
|
||||||
return x + dx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ArenaAlloc::expand(size_t offset_z)
|
ArenaAlloc::expand(size_t offset_z)
|
||||||
{
|
{
|
||||||
|
|
@ -118,7 +169,7 @@ namespace xo {
|
||||||
xtag("requested", offset_z), xtag("reserved", reserved())));
|
xtag("requested", offset_z), xtag("reserved", reserved())));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t aligned_offset_z = align_lub(offset_z, page_z_);
|
std::size_t aligned_offset_z = align_lub(offset_z, hugepage_z_);
|
||||||
std::byte * commit_start = lo_ + committed_z_;
|
std::byte * commit_start = lo_ + committed_z_;
|
||||||
std::size_t add_commit_z = aligned_offset_z - committed_z_;
|
std::size_t add_commit_z = aligned_offset_z - committed_z_;
|
||||||
|
|
||||||
|
|
@ -130,7 +181,7 @@ namespace xo {
|
||||||
xtag("add_commit_z", add_commit_z),
|
xtag("add_commit_z", add_commit_z),
|
||||||
xtag("commit_end", commit_start + add_commit_z));
|
xtag("commit_end", commit_start + add_commit_z));
|
||||||
|
|
||||||
if (mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) {
|
if (::mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) {
|
||||||
throw std::runtime_error(tostr("ArenaAlloc::expand: commit failure",
|
throw std::runtime_error(tostr("ArenaAlloc::expand: commit failure",
|
||||||
xtag("committed_z", committed_z_),
|
xtag("committed_z", committed_z_),
|
||||||
xtag("add_commit_z", add_commit_z)));
|
xtag("add_commit_z", add_commit_z)));
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@
|
||||||
namespace xo {
|
namespace xo {
|
||||||
namespace scm {
|
namespace scm {
|
||||||
reader::reader(bool debug_flag) :
|
reader::reader(bool debug_flag) :
|
||||||
tokenizer_{debug_flag}, parser_{debug_flag}
|
tokenizer_{debug_flag},
|
||||||
|
parser_{debug_flag}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -29,7 +30,7 @@ namespace xo {
|
||||||
}
|
}
|
||||||
|
|
||||||
reader_result
|
reader_result
|
||||||
reader::read_expr(const span_type & input_arg, bool eof)
|
reader::read_expr(const span_type & input_arg, bool eof_flag)
|
||||||
{
|
{
|
||||||
scope log(XO_DEBUG(this->debug_flag()));
|
scope log(XO_DEBUG(this->debug_flag()));
|
||||||
|
|
||||||
|
|
@ -38,20 +39,25 @@ namespace xo {
|
||||||
/* input text-span consumed by this call.
|
/* input text-span consumed by this call.
|
||||||
* Always comprises some number (possibly 0)
|
* Always comprises some number (possibly 0)
|
||||||
* of complete tokens, along with any leading
|
* of complete tokens, along with any leading
|
||||||
* whitespace
|
* whitespace.
|
||||||
|
*
|
||||||
|
* expr_span may also begin and end part way through
|
||||||
|
* distinct input lines
|
||||||
*/
|
*/
|
||||||
span_type expr_span = input.prefix(0ul);
|
span_type expr_span = input.prefix(0ul);
|
||||||
|
|
||||||
while (!input.empty()) {
|
while (!input.empty()) {
|
||||||
/* each loop iterations reads one token */
|
/* each loop iteration reads one token */
|
||||||
|
|
||||||
/* read one token from input */
|
/* read one token from input.
|
||||||
auto [tk, used_span, error1] = this->tokenizer_.scan2(input, eof);
|
* tokenizer stashes one line at a time, but used_span only
|
||||||
|
* reports in used_span the portion representing the first token.
|
||||||
|
*/
|
||||||
|
auto [tk, used_span, error1] = this->tokenizer_.scan(input, eof_flag);
|
||||||
|
|
||||||
log && log(xtag("consumed", used_span));
|
log && log(xtag("consumed", used_span));
|
||||||
log && log(xtag("input.pre", input));
|
log && log(xtag("input.pre", input));
|
||||||
|
|
||||||
input = this->tokenizer_.consume(used_span, input);
|
|
||||||
expr_span += used_span;
|
expr_span += used_span;
|
||||||
|
|
||||||
if (tk.is_valid()) {
|
if (tk.is_valid()) {
|
||||||
|
|
@ -76,7 +82,7 @@ namespace xo {
|
||||||
expr_span, parser_.stack_size(), reader_error());
|
expr_span, parser_.stack_size(), reader_error());
|
||||||
} else if (parser_result.is_error()) {
|
} else if (parser_result.is_error()) {
|
||||||
/* 1. parser detected error.
|
/* 1. parser detected error.
|
||||||
* 2. tokenizer_.input_state() refers to position just after offending token
|
* 2. tokenizer_.input_state().current_pos refers to position just after offending token
|
||||||
* 3. error_pos here is 0 because error detected at token boundary
|
* 3. error_pos here is 0 because error detected at token boundary
|
||||||
*/
|
*/
|
||||||
reader_error error2(parser_result.error_src_function(),
|
reader_error error2(parser_result.error_src_function(),
|
||||||
|
|
@ -122,7 +128,7 @@ namespace xo {
|
||||||
* 1. input.empty (perhaps ate some whitespace, ok)
|
* 1. input.empty (perhaps ate some whitespace, ok)
|
||||||
* 2. missing or incomplete token (ok unless eof)
|
* 2. missing or incomplete token (ok unless eof)
|
||||||
*/
|
*/
|
||||||
if (eof) {
|
if (eof_flag) {
|
||||||
if (parser_.has_incomplete_expr()) {
|
if (parser_.has_incomplete_expr()) {
|
||||||
throw std::runtime_error
|
throw std::runtime_error
|
||||||
("reader::read_expr"
|
("reader::read_expr"
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,10 @@ main() {
|
||||||
tokenizer_type tkz(xo::log_config::min_log_level <= xo::log_level::info);
|
tokenizer_type tkz(xo::log_config::min_log_level <= xo::log_level::info);
|
||||||
string input_str;
|
string input_str;
|
||||||
|
|
||||||
|
size_t line_no = 1;
|
||||||
|
|
||||||
|
constexpr std::size_t c_maxlines = 25;
|
||||||
|
|
||||||
while (repl_getline(interactive, cin, cout, input_str)) {
|
while (repl_getline(interactive, cin, cout, input_str)) {
|
||||||
// we want tokenizer to see newline, it's syntax
|
// we want tokenizer to see newline, it's syntax
|
||||||
input_str.push_back('\n');
|
input_str.push_back('\n');
|
||||||
|
|
@ -36,7 +40,7 @@ main() {
|
||||||
|
|
||||||
// reminder: input may contain multiple tokens
|
// reminder: input may contain multiple tokens
|
||||||
while (!input.empty()) {
|
while (!input.empty()) {
|
||||||
auto [tk, consumed, error] = tkz.scan(input);
|
auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/);
|
||||||
|
|
||||||
if (tk.is_valid()) {
|
if (tk.is_valid()) {
|
||||||
cout << tk << endl;
|
cout << tk << endl;
|
||||||
|
|
@ -47,29 +51,16 @@ main() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
input = tkz.consume(consumed, input);
|
input = input.after_prefix(consumed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* here: input.empty() or error encountered */
|
/* here: input.empty() or error encountered */
|
||||||
|
|
||||||
/* discard stashed remainder of input line
|
++line_no;
|
||||||
* (for nicely-formatted errors)
|
|
||||||
*/
|
|
||||||
tkz.discard_current_line();
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
if (line_no > c_maxlines) {
|
||||||
span_type input = span_type::from_string(input_str);
|
cout << "always exit after " << c_maxlines << " lines of input" << endl;
|
||||||
|
break;
|
||||||
auto [tk, consumed, error] = tkz.notify_eof(input);
|
|
||||||
|
|
||||||
input = tkz.consume(consumed, input);
|
|
||||||
|
|
||||||
if (tk.is_valid()) {
|
|
||||||
cout << tk << endl;
|
|
||||||
} else if (error.is_error()) {
|
|
||||||
cout << "parsing error: " << endl;
|
|
||||||
error.report(cout);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,50 @@
|
||||||
|
|
||||||
namespace xo {
|
namespace xo {
|
||||||
namespace scm {
|
namespace scm {
|
||||||
|
/** enum to report outcome of @ref capture_current_line **/
|
||||||
|
enum class input_error {
|
||||||
|
/** normal return, input line successfully identified and captured **/
|
||||||
|
ok = 0,
|
||||||
|
/** incomplete input; should not have been submitted to @ref capture_current_line.
|
||||||
|
* note: submit last line of input with eof_flag=true
|
||||||
|
**/
|
||||||
|
incomplete,
|
||||||
|
N
|
||||||
|
};
|
||||||
|
|
||||||
/** @class input_state
|
/** @class input_state
|
||||||
* @brief Track detailed input position for use in error messages
|
* @brief Track detailed input position for use in error messages
|
||||||
*
|
*
|
||||||
|
* input characters fall into two categories:
|
||||||
|
* - consumed: memory can be reclaimed/recycled
|
||||||
|
* - buffered: memory will be retained unaltered until consumed
|
||||||
|
*
|
||||||
|
* remarks:
|
||||||
|
* - always in one of two states:
|
||||||
|
* - empty
|
||||||
|
* - contains exactly one line of input
|
||||||
|
* - also record current input position.
|
||||||
|
* Use this for example to identify where tokenizer rejected input.
|
||||||
|
* - .current_pos advances by one token
|
||||||
|
*
|
||||||
|
* - buffered characters always form a single contiguous range.
|
||||||
|
* - input_state does not own any storage; storage is owned elsewhere
|
||||||
|
*
|
||||||
|
* @text
|
||||||
|
*
|
||||||
|
* <------------------.current_line------------------>
|
||||||
|
* > <-- .whitespace
|
||||||
|
* cccccccccccccccccccccccccccccccc__TTTTTTTTxxxxxxxxx
|
||||||
|
* ^ ^ ^
|
||||||
|
* .current_line.lo | .current_line.hi
|
||||||
|
* .current_pos
|
||||||
|
*
|
||||||
|
* <----prev_line----> <----current_line---->
|
||||||
|
* > <--whitespace
|
||||||
|
* ppppppppppppppppppp cccccccccccc__TTTTTTTT
|
||||||
|
* ^
|
||||||
|
*
|
||||||
|
* @endtext
|
||||||
**/
|
**/
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
class input_state {
|
class input_state {
|
||||||
|
|
@ -33,8 +74,11 @@ namespace xo {
|
||||||
/** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
|
/** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
|
||||||
* Introduced for unit tests, not used in tokenizer.
|
* Introduced for unit tests, not used in tokenizer.
|
||||||
**/
|
**/
|
||||||
explicit input_state(const span<const CharT>& current_line, size_t current_pos, size_t whitespace)
|
explicit input_state(const span<const CharT>& current_line,
|
||||||
: current_line_{current_line}, current_pos_{current_pos}, whitespace_{whitespace} {}
|
size_t current_pos,
|
||||||
|
size_t whitespace) : current_line_{current_line},
|
||||||
|
current_pos_{current_pos},
|
||||||
|
whitespace_{whitespace} {}
|
||||||
|
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
|
|
@ -63,6 +107,7 @@ namespace xo {
|
||||||
#endif
|
#endif
|
||||||
const span_type & current_line() const { return current_line_; }
|
const span_type & current_line() const { return current_line_; }
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
|
size_t tk_start() const { return tk_start_; }
|
||||||
size_t current_pos() const { return current_pos_; }
|
size_t current_pos() const { return current_pos_; }
|
||||||
size_t whitespace() const { return whitespace_; }
|
size_t whitespace() const { return whitespace_; }
|
||||||
bool debug_flag() const { return debug_flag_; }
|
bool debug_flag() const { return debug_flag_; }
|
||||||
|
|
@ -77,27 +122,65 @@ namespace xo {
|
||||||
**/
|
**/
|
||||||
input_state rewind(std::size_t n) const;
|
input_state rewind(std::size_t n) const;
|
||||||
|
|
||||||
/** Capture prefix of @p input up to first newline **/
|
/** Capture prefix of @p input up to first newline.
|
||||||
void capture_current_line(const span_type & input);
|
* Set read position to start of line.
|
||||||
|
*
|
||||||
|
* Alters:
|
||||||
|
* .current_line
|
||||||
|
* .current_pos
|
||||||
|
*
|
||||||
|
* Return pair comprising error code and input span representing first line
|
||||||
|
* (including trailing newline) from @p input.
|
||||||
|
**/
|
||||||
|
std::pair<input_error, span_type> capture_current_line(const span_type & input,
|
||||||
|
bool eof_flag);
|
||||||
|
|
||||||
|
/** atomically return current line while discarding it from input state
|
||||||
|
*
|
||||||
|
* Alters
|
||||||
|
* .current_line
|
||||||
|
* .current_pos
|
||||||
|
* .whitespace
|
||||||
|
**/
|
||||||
|
span_type consume_current_line();
|
||||||
|
|
||||||
/** Reset input state for start of next line.
|
/** Reset input state for start of next line.
|
||||||
* Expression parser may use this to discard remainder of input line
|
* Expression parser may use this to discard remainder of input line
|
||||||
* after a parsing error.
|
* after a parsing error.
|
||||||
|
*
|
||||||
|
* Alters:
|
||||||
|
* .current_line
|
||||||
|
* .current_pos
|
||||||
|
* .whitespace
|
||||||
**/
|
**/
|
||||||
void discard_current_line();
|
void discard_current_line();
|
||||||
|
|
||||||
/** Add @p z to current position **/
|
/** Advance input position by @p z
|
||||||
void consume(size_t z);
|
|
||||||
|
|
||||||
/** Skip prefix of input comprising whitespace.
|
|
||||||
* Return pointer to first non-whitespace character in @p input,
|
|
||||||
* or @c input.hi if input contains only whitespace.
|
|
||||||
*
|
|
||||||
* if @p input contains any newlines, preserves suffix after last
|
|
||||||
* such newilne in @p current_line_
|
|
||||||
*
|
*
|
||||||
|
* Alters:
|
||||||
|
* .current_pos
|
||||||
**/
|
**/
|
||||||
const CharT * skip_leading_whitespace(const span_type & input);
|
void advance(size_t z);
|
||||||
|
|
||||||
|
/** Advance .current_pos to pos.
|
||||||
|
* Require: pos in @ref current_line_
|
||||||
|
**/
|
||||||
|
void advance_until(const CharT * pos);
|
||||||
|
|
||||||
|
/** Skip prefix of input, starting at current read position,
|
||||||
|
* comprising only whitespace.
|
||||||
|
*
|
||||||
|
* Presume input position is at end of token;
|
||||||
|
* on return @ref whitespace_ counts number of whitespace characters
|
||||||
|
* skipped.
|
||||||
|
*
|
||||||
|
* Return pointer to first non-whitespace character after @ref current_pos_
|
||||||
|
* or @ref current_line_.hi if reached end of buffered line.
|
||||||
|
*
|
||||||
|
* Alters:
|
||||||
|
* .whitespace
|
||||||
|
**/
|
||||||
|
const CharT * skip_leading_whitespace();
|
||||||
|
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
|
|
@ -107,7 +190,9 @@ namespace xo {
|
||||||
|
|
||||||
/** remember current input line. Used only to report errors **/
|
/** remember current input line. Used only to report errors **/
|
||||||
span<const CharT> current_line_ = span<const CharT>();
|
span<const CharT> current_line_ = span<const CharT>();
|
||||||
/** current input position within @ref current_line_ **/
|
/** start of last token within @ref current_line_ **/
|
||||||
|
size_t tk_start_ = 0;
|
||||||
|
/** input position within @ref current_line_ **/
|
||||||
size_t current_pos_ = 0;
|
size_t current_pos_ = 0;
|
||||||
/** number of whitespace chars since end of preceding token,
|
/** number of whitespace chars since end of preceding token,
|
||||||
* or last newline, whichever is less
|
* or last newline, whichever is less
|
||||||
|
|
@ -149,7 +234,7 @@ namespace xo {
|
||||||
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
void
|
void
|
||||||
input_state<CharT>::consume(size_t z) {
|
input_state<CharT>::advance(size_t z) {
|
||||||
scope log(XO_DEBUG(debug_flag_));
|
scope log(XO_DEBUG(debug_flag_));
|
||||||
|
|
||||||
this->current_pos_ += z;
|
this->current_pos_ += z;
|
||||||
|
|
@ -157,6 +242,28 @@ namespace xo {
|
||||||
log && log(xtag("z", z), xtag("current_pos", current_pos_));
|
log && log(xtag("z", z), xtag("current_pos", current_pos_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename CharT>
|
||||||
|
void
|
||||||
|
input_state<CharT>::advance_until(const CharT * pos) {
|
||||||
|
scope log(XO_DEBUG(debug_flag_));
|
||||||
|
|
||||||
|
assert(current_line_.lo() <= pos && pos < current_line_.hi());
|
||||||
|
|
||||||
|
this->current_pos_ = pos - current_line_.lo();
|
||||||
|
|
||||||
|
log && log(xtag("current_pos", current_pos_));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename CharT>
|
||||||
|
auto
|
||||||
|
input_state<CharT>::consume_current_line() -> span_type {
|
||||||
|
span_type retval = current_line_;
|
||||||
|
|
||||||
|
this->discard_current_line();
|
||||||
|
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
void
|
void
|
||||||
input_state<CharT>::discard_current_line() {
|
input_state<CharT>::discard_current_line() {
|
||||||
|
|
@ -166,10 +273,14 @@ namespace xo {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
void
|
auto
|
||||||
input_state<CharT>::capture_current_line(const span_type & input)
|
input_state<CharT>::capture_current_line(const span_type & input,
|
||||||
|
bool eof_flag) -> std::pair<input_error, span_type>
|
||||||
{
|
{
|
||||||
// see also discard_current_line()
|
// see also discard_current_line()
|
||||||
|
// note: must capture entirety of first line,
|
||||||
|
// for example including leading whitespace.
|
||||||
|
// See discussion in tokenizer scan() method
|
||||||
|
|
||||||
scope log(XO_DEBUG(debug_flag_));
|
scope log(XO_DEBUG(debug_flag_));
|
||||||
|
|
||||||
|
|
@ -177,44 +288,76 @@ namespace xo {
|
||||||
const CharT * sol = input.lo();
|
const CharT * sol = input.lo();
|
||||||
const CharT * eol = sol;
|
const CharT * eol = sol;
|
||||||
|
|
||||||
|
if (sol == current_line_.lo()) {
|
||||||
|
log && log("short-circuit - current line already stashed");
|
||||||
|
|
||||||
|
/* nothing to do here */
|
||||||
|
return std::make_pair(input_error::ok, current_line_);
|
||||||
|
}
|
||||||
|
|
||||||
while ((eol < input.hi()) && (*eol != '\n'))
|
while ((eol < input.hi()) && (*eol != '\n'))
|
||||||
++eol;
|
++eol;
|
||||||
|
|
||||||
|
if (*eol == '\n') {
|
||||||
|
/* include \n at end-of-line */
|
||||||
|
++eol;
|
||||||
|
} else {
|
||||||
|
if (!eof_flag) {
|
||||||
|
/* caller expected to provide complete line of input. complain and ignore */
|
||||||
|
return std::make_pair(input_error::incomplete,
|
||||||
|
input.prefix(0ul));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this->current_line_ = span_type(sol, eol);
|
this->current_line_ = span_type(sol, eol);
|
||||||
this->current_pos_ = 0;
|
this->current_pos_ = 0;
|
||||||
|
this->whitespace_ = 0;
|
||||||
|
|
||||||
log && log(xtag("current_line", print::printspan(current_line_)),
|
log && log(xtag("current_line", print::printspan(current_line_)),
|
||||||
xtag("current_pos", current_pos_));
|
xtag("current_pos", current_pos_));
|
||||||
|
|
||||||
|
return std::make_pair(input_error::ok,
|
||||||
|
span_type(sol, eol));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
const CharT *
|
const CharT *
|
||||||
input_state<CharT>::skip_leading_whitespace(const span_type & input)
|
input_state<CharT>::skip_leading_whitespace()
|
||||||
{
|
{
|
||||||
scope log(XO_DEBUG(debug_flag_));
|
scope log(XO_DEBUG(debug_flag_));
|
||||||
|
|
||||||
const CharT * ix = input.lo();
|
const CharT * ix = current_line_.lo() + current_pos_;
|
||||||
|
|
||||||
if (this->current_line().is_null()) {
|
|
||||||
this->capture_current_line(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
this->whitespace_ = 0;
|
this->whitespace_ = 0;
|
||||||
|
|
||||||
/* skip whitespace + remember beginning of most recent line */
|
/* skip whitespace + remember beginning of most recent line */
|
||||||
while (is_whitespace(*ix) && (ix != input.hi())) {
|
while (is_whitespace(*ix) && (ix != current_line_.hi())) {
|
||||||
if (is_newline(*ix)) {
|
++ix;
|
||||||
++ix;
|
|
||||||
|
|
||||||
this->capture_current_line(span_type(ix, input.hi()));
|
++(this->whitespace_);
|
||||||
} else {
|
|
||||||
++ix;
|
|
||||||
|
|
||||||
++(this->whitespace_);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->tk_start_ = ix - current_line_.lo();
|
||||||
|
this->current_pos_ = ix - current_line_.lo();
|
||||||
|
|
||||||
return ix;
|
return ix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename CharT>
|
||||||
|
inline std::ostream &
|
||||||
|
operator<<(std::ostream & os,
|
||||||
|
const input_state<CharT>& x)
|
||||||
|
{
|
||||||
|
using xo::print::unq;
|
||||||
|
|
||||||
|
os << "<input_state"
|
||||||
|
<< xtag("tk", x.tk_start())
|
||||||
|
<< xtag("pos", x.current_pos())
|
||||||
|
<< xtag("line", unq(std::string_view(x.current_line().lo(), x.current_line().hi())))
|
||||||
|
<< xtag("whitespace", x.whitespace())
|
||||||
|
<< ">";
|
||||||
|
|
||||||
|
return os;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
#include "token.hpp"
|
#include "token.hpp"
|
||||||
#include "tokenizer_error.hpp"
|
#include "tokenizer_error.hpp"
|
||||||
|
#include "input_state.hpp"
|
||||||
|
|
||||||
namespace xo {
|
namespace xo {
|
||||||
namespace scm {
|
namespace scm {
|
||||||
|
|
@ -17,11 +18,11 @@ namespace xo {
|
||||||
* Possible outcomes fall into several categories
|
* Possible outcomes fall into several categories
|
||||||
* (with T: @c token_.is_valid(), E: @cerror_.is_error())
|
* (with T: @c token_.is_valid(), E: @cerror_.is_error())
|
||||||
*
|
*
|
||||||
* | T | E | description |
|
* | T | E | description |
|
||||||
* |-------+-------+-------------------|
|
* |-------+-------+-------------------------------------|
|
||||||
* | false | false | end of input |
|
* | false | false | end of input, including end of line |
|
||||||
* | true | false | parsed token in T |
|
* | true | false | parsed token in T |
|
||||||
* | false | true | parse error in E |
|
* | false | true | parse error in E |
|
||||||
*
|
*
|
||||||
* @endcode
|
* @endcode
|
||||||
**/
|
**/
|
||||||
|
|
@ -31,6 +32,7 @@ namespace xo {
|
||||||
using token_type = token<CharT>;
|
using token_type = token<CharT>;
|
||||||
using span_type = span<const CharT>;
|
using span_type = span<const CharT>;
|
||||||
using error_type = tokenizer_error<CharT>;
|
using error_type = tokenizer_error<CharT>;
|
||||||
|
using input_state_type = input_state<CharT>;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
scan_result(const token_type & token,
|
scan_result(const token_type & token,
|
||||||
|
|
@ -40,7 +42,8 @@ namespace xo {
|
||||||
|
|
||||||
static scan_result make_whitespace(const span_type & prefix_input);
|
static scan_result make_whitespace(const span_type & prefix_input);
|
||||||
static scan_result make_partial(const span_type & prefix_input);
|
static scan_result make_partial(const span_type & prefix_input);
|
||||||
static scan_result make_error(const error_type & error);
|
static scan_result make_error(const error_type & error,
|
||||||
|
input_state_type & input_state_ref);
|
||||||
|
|
||||||
bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
|
bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
|
||||||
bool is_token() const { return token_.is_valid(); }
|
bool is_token() const { return token_.is_valid(); }
|
||||||
|
|
@ -51,7 +54,10 @@ namespace xo {
|
||||||
const error_type & error() const { return error_; }
|
const error_type & error() const { return error_; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** successfully parsed token, whenever tk_type != tokentype::tk_invalid **/
|
/** Successfully parsed token, whenever tk_type != tokentype::tk_invalid.
|
||||||
|
* Will be tokentype::tk_invalid in normal cause of events for valid input,
|
||||||
|
* when consuming whitespace
|
||||||
|
**/
|
||||||
token_type token_;
|
token_type token_;
|
||||||
/** input span represented by .token, on success. Otherwise not defined **/
|
/** input span represented by .token, on success. Otherwise not defined **/
|
||||||
span_type consumed_;
|
span_type consumed_;
|
||||||
|
|
@ -72,9 +78,14 @@ namespace xo {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
auto scan_result<CharT>::make_error(const error_type & error) -> scan_result
|
auto scan_result<CharT>::make_error(const error_type & error,
|
||||||
|
input_state_type & input_state_ref) -> scan_result
|
||||||
{
|
{
|
||||||
return scan_result(token_type::invalid(), span_type::make_null(), error);
|
/* report+consume entire input line */
|
||||||
|
|
||||||
|
return scan_result(token_type::invalid(),
|
||||||
|
input_state_ref.consume_current_line(),
|
||||||
|
error);
|
||||||
}
|
}
|
||||||
|
|
||||||
} /*namespace scm*/
|
} /*namespace scm*/
|
||||||
|
|
|
||||||
|
|
@ -99,22 +99,15 @@ namespace xo {
|
||||||
static bool is_2char_punctuation(CharT ch);
|
static bool is_2char_punctuation(CharT ch);
|
||||||
|
|
||||||
/** assemble token from text @p token_text.
|
/** assemble token from text @p token_text.
|
||||||
* @p token_text will often (but not always) represent a subset of @p input.
|
|
||||||
* (For example consider multi-line string literals)
|
|
||||||
* Also the span @p token_text may (in uncommon cases)
|
|
||||||
* have been copied to separate storage from @p input
|
|
||||||
*
|
|
||||||
* @p initial_whitespace Amount of whitespace input being consumed from input.
|
* @p initial_whitespace Amount of whitespace input being consumed from input.
|
||||||
* @p initial_token_prefix_from_input Amount of non-whitespace input being
|
* @p token_text subset of input_line representing a single token.
|
||||||
* consumed from input. Not counting any stashed-and-already-consumed input
|
* @p input_state input state containing input_line
|
||||||
*
|
*
|
||||||
* retval.consumed will represent some possibly-empty prefix of @p input
|
* retval.consumed will represent some possibly-empty prefix of @p input
|
||||||
**/
|
**/
|
||||||
static result_type assemble_token(std::size_t initial_whitespace,
|
static result_type assemble_token(std::size_t initial_whitespace,
|
||||||
std::size_t initial_token_prefix_from_input,
|
|
||||||
const span_type & token_text,
|
const span_type & token_text,
|
||||||
const span_type & input,
|
input_state_type & input_state);
|
||||||
const input_state_type & input_state);
|
|
||||||
|
|
||||||
/** degenerate version of assemble_token() on reaching end-of-file **/
|
/** degenerate version of assemble_token() on reaching end-of-file **/
|
||||||
static result_type assemble_final_token(const span_type & token_text,
|
static result_type assemble_final_token(const span_type & token_text,
|
||||||
|
|
@ -136,35 +129,14 @@ namespace xo {
|
||||||
*
|
*
|
||||||
* @return {parsed token, consumed span}
|
* @return {parsed token, consumed span}
|
||||||
**/
|
**/
|
||||||
result_type scan(const span_type & input);
|
result_type scan(const span_type & input,
|
||||||
|
bool eof_flag);
|
||||||
/** When eof is false, same as scan(input).
|
|
||||||
* When eof is true and scan(input) does not report a token,
|
|
||||||
* return notify_eof()
|
|
||||||
**/
|
|
||||||
result_type scan2(const span_type & input, bool eof);
|
|
||||||
|
|
||||||
/** @retval span with @p consumed permanently removed from @p input.
|
|
||||||
*
|
|
||||||
* Purpose of this method is to update @ref current_pos_.
|
|
||||||
**/
|
|
||||||
span_type consume(const span_type & consumed, const span_type & input);
|
|
||||||
|
|
||||||
/** discard current line after error. Just cleans up error-reporting state **/
|
/** discard current line after error. Just cleans up error-reporting state **/
|
||||||
void discard_current_line();
|
void discard_current_line();
|
||||||
|
|
||||||
/** notify end of input, resolving any ambiguous input stashed in .prefix
|
|
||||||
**/
|
|
||||||
result_type notify_eof(const span_type & input);
|
|
||||||
|
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
private:
|
|
||||||
result_type scan_completion(const span_type & whitespace,
|
|
||||||
const CharT* token_end,
|
|
||||||
const span_type & input,
|
|
||||||
const input_state_type & input_state);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/** @defgroup tokenizer-instance-vars tokenizer instance variables **/
|
/** @defgroup tokenizer-instance-vars tokenizer instance variables **/
|
||||||
///@{
|
///@{
|
||||||
|
|
@ -283,19 +255,16 @@ namespace xo {
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
auto
|
auto
|
||||||
tokenizer<CharT>::assemble_token(std::size_t initial_whitespace,
|
tokenizer<CharT>::assemble_token(std::size_t initial_whitespace,
|
||||||
std::size_t initial_token_prefix_from_input,
|
|
||||||
const span_type & token_text,
|
const span_type & token_text,
|
||||||
const span_type & input,
|
input_state_type & input_state_ref) -> result_type
|
||||||
const input_state_type & input_state) -> result_type
|
|
||||||
{
|
{
|
||||||
/* literal|pretty|streamlined */
|
/* literal|pretty|streamlined */
|
||||||
log_config::style = function_style::streamlined;
|
log_config::style = function_style::streamlined;
|
||||||
|
|
||||||
scope log(XO_DEBUG(input_state.debug_flag()));
|
scope log(XO_DEBUG(input_state_ref.debug_flag()));
|
||||||
log && log(xtag("token_text", token_text),
|
log && log(xtag("token_text", token_text),
|
||||||
xtag("initial_whitespace", initial_whitespace),
|
xtag("initial_whitespace", initial_whitespace),
|
||||||
xtag("initial_token_prefix_from_input", initial_token_prefix_from_input),
|
xtag("input_state", input_state_ref));
|
||||||
xtag("input", input));
|
|
||||||
|
|
||||||
tokentype tk_type = tokentype::tk_invalid;
|
tokentype tk_type = tokentype::tk_invalid;
|
||||||
std::string tk_text;
|
std::string tk_text;
|
||||||
|
|
@ -394,17 +363,19 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"improperly placed sign indicator",
|
"improperly placed sign indicator",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)
|
(ix - tk_start)
|
||||||
));
|
),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
} else if (*ix == '.') {
|
} else if (*ix == '.') {
|
||||||
if (period_flag) {
|
if (period_flag) {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"duplicate decimal point in numeric literal",
|
"duplicate decimal point in numeric literal",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
period_flag = true;
|
period_flag = true;
|
||||||
|
|
@ -413,8 +384,9 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"duplicate exponent marker in numeric literal",
|
"duplicate exponent marker in numeric literal",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
exponent_flag = true;
|
exponent_flag = true;
|
||||||
|
|
@ -429,8 +401,9 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"unexpected character in numeric constant" /*error_description*/,
|
"unexpected character in numeric constant" /*error_description*/,
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -532,8 +505,9 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"expecting key following escape character \\",
|
"expecting key following escape character \\",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(*ix) {
|
switch(*ix) {
|
||||||
|
|
@ -561,8 +535,9 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"expecting one of n|r|\"|\\ following escape \\",
|
"expecting one of n|r|\"|\\ following escape \\",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -578,8 +553,9 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"missing terminating '\"' to complete literal string",
|
"missing terminating '\"' to complete literal string",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
log && log(tostr("tokenizer::assemble_token",
|
log && log(tostr("tokenizer::assemble_token",
|
||||||
|
|
@ -720,8 +696,9 @@ namespace xo {
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"illegal input character",
|
"illegal input character",
|
||||||
input_state,
|
input_state_ref,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
input_state_ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((tk_type == tokentype::tk_i64)
|
if ((tk_type == tokentype::tk_i64)
|
||||||
|
|
@ -771,8 +748,11 @@ namespace xo {
|
||||||
tk_text.clear();
|
tk_text.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* input.prefix(0):
|
||||||
|
* require caller preserves current input line until it's entirely exhausted
|
||||||
|
*/
|
||||||
return result_type(token_type(tk_type, std::move(tk_text)),
|
return result_type(token_type(tk_type, std::move(tk_text)),
|
||||||
input.prefix(initial_whitespace + initial_token_prefix_from_input));
|
input_state_ref.current_line().prefix(0));
|
||||||
} /*assemble_token*/
|
} /*assemble_token*/
|
||||||
|
|
||||||
/* TODO: input_state_ as argument ? */
|
/* TODO: input_state_ as argument ? */
|
||||||
|
|
@ -782,67 +762,44 @@ namespace xo {
|
||||||
const input_state_type & input_state) -> result_type
|
const input_state_type & input_state) -> result_type
|
||||||
{
|
{
|
||||||
return assemble_token(0 /*initial_whitespace*/,
|
return assemble_token(0 /*initial_whitespace*/,
|
||||||
0 /*initial_token_prefix_from_input*/,
|
|
||||||
token_text,
|
token_text,
|
||||||
span_type::make_null(),
|
|
||||||
input_state);
|
input_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: prefix_, input_state_ as arguments */
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
auto
|
auto
|
||||||
tokenizer<CharT>::scan_completion(const span_type & whitespace,
|
tokenizer<CharT>::scan(const span_type & input, bool eof_flag) -> result_type
|
||||||
const CharT* token_end,
|
|
||||||
const span_type & input,
|
|
||||||
const input_state_type & input_state) -> result_type {
|
|
||||||
|
|
||||||
auto token_span = input.after_prefix(whitespace).prefix_upto(token_end);
|
|
||||||
|
|
||||||
if (this->prefix_.empty()) {
|
|
||||||
return assemble_token(whitespace.size(),
|
|
||||||
token_span.size() /*initial_token_prefix_from_input*/,
|
|
||||||
token_span,
|
|
||||||
input,
|
|
||||||
input_state);
|
|
||||||
} else {
|
|
||||||
/* whatever we stashed in .prefix_, should be consumed from input.
|
|
||||||
* control here implies reached end of input with either
|
|
||||||
* - input for which parsing outcome depends on existence of more input,
|
|
||||||
* and presence of eof now resolves
|
|
||||||
* - malformed input (that might represent prefix of a valid token. Say "#incl" in C)
|
|
||||||
*
|
|
||||||
* That means stashed .prefix will represent copied range of characters that
|
|
||||||
* ends at the same position as input
|
|
||||||
*/
|
|
||||||
return result_type::make_partial(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef NOT_USING
|
|
||||||
template <typename CharT>
|
|
||||||
void
|
|
||||||
tokenizer<CharT>::capture_current_line(const span_type & input)
|
|
||||||
{
|
|
||||||
this->input_state_.capture_current_line(input);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename CharT>
|
|
||||||
auto
|
|
||||||
tokenizer<CharT>::scan(const span_type & input) -> result_type
|
|
||||||
{
|
{
|
||||||
scope log(XO_DEBUG(input_state_.debug_flag()));
|
scope log(XO_DEBUG(input_state_.debug_flag()));
|
||||||
|
|
||||||
log && log(xtag("input", input));
|
log && log(xtag("input", input));
|
||||||
|
|
||||||
const CharT * ix = this->input_state_.skip_leading_whitespace(input);
|
/* - Always at beginning of token when scan() invoked
|
||||||
|
* - scan will not report any portion of line as consumed until it has
|
||||||
|
* emitted all tokens in that line.
|
||||||
|
* rationale: caller is allowed to discard storage that
|
||||||
|
* scan() reports as consumed. But will be holding that line
|
||||||
|
* until all tokens have been read.
|
||||||
|
* - this means caller will typically call scan()
|
||||||
|
* with the same input span multiple times
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* automagically no-ops when the same input presented twice */
|
||||||
|
this->input_state_.capture_current_line(input, eof_flag);
|
||||||
|
|
||||||
|
const CharT * ix = this->input_state_.skip_leading_whitespace();
|
||||||
|
|
||||||
if(ix == input.hi()) {
|
if(ix == input.hi()) {
|
||||||
/* no-op */
|
log && log("end input -> consume current line");
|
||||||
return result_type::make_whitespace(input.prefix_upto(ix));
|
|
||||||
|
/* entirety of current line has been tokenized
|
||||||
|
* -> caller may consume it
|
||||||
|
*/
|
||||||
|
return result_type::make_whitespace(this->input_state_.consume_current_line());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// 1. hoist complete_flag up here
|
// 1. hoist complete_flag up here
|
||||||
// 2. use in each branch
|
// 2. use in each branch
|
||||||
|
|
@ -850,9 +807,9 @@ namespace xo {
|
||||||
|
|
||||||
/* here: *ix is not whitespace */
|
/* here: *ix is not whitespace */
|
||||||
|
|
||||||
auto whitespace_span = input.prefix_upto(ix);
|
auto whitespace_z = input_state_.whitespace();
|
||||||
|
|
||||||
log && log(xtag("whitespace.size", input_state_.whitespace()));
|
log && log(xtag("whitespace_z", whitespace_z));
|
||||||
|
|
||||||
/* tk_start points to known beginning of token
|
/* tk_start points to known beginning of token
|
||||||
* (after any whitespace)
|
* (after any whitespace)
|
||||||
|
|
@ -871,12 +828,15 @@ namespace xo {
|
||||||
|
|
||||||
++ix;
|
++ix;
|
||||||
|
|
||||||
|
#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit
|
||||||
if (ix == input.hi()) {
|
if (ix == input.hi()) {
|
||||||
/* need more input to know if/when token complete */
|
/* need more input to know if/when token complete */
|
||||||
this->prefix_ += std::string(tk_start, input.hi());
|
this->prefix_ += std::string(tk_start, input.hi());
|
||||||
|
|
||||||
log && log(xtag("captured-prefix1", this->prefix_));
|
log && log(xtag("captured-prefix1", this->prefix_));
|
||||||
} else {
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
CharT ch2 = *ix;
|
CharT ch2 = *ix;
|
||||||
|
|
||||||
if (((ch2 >= '0') && (ch2 <= '9'))
|
if (((ch2 >= '0') && (ch2 <= '9'))
|
||||||
|
|
@ -909,21 +869,28 @@ namespace xo {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if ((*ix == '\n') || (*ix == '\r')) {
|
} else if ((*ix == '\n') || (*ix == '\r')) {
|
||||||
|
log && log ("string literal with naked newline or CR");
|
||||||
|
|
||||||
return result_type::make_error
|
return result_type::make_error
|
||||||
(error_type(__FUNCTION__ /*src_function*/,
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
"must use \\n or \\r to encode newline/cr in string literal",
|
"must use \\n or \\r to encode newline/cr in string literal",
|
||||||
input_state_,
|
input_state_,
|
||||||
(ix - tk_start)));
|
(ix - tk_start)),
|
||||||
|
this->input_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
prev_ch = *ix;
|
prev_ch = *ix;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!complete_flag) {
|
if (!complete_flag) {
|
||||||
/* need more input to know if/when token complete */
|
log && log("unterminated string literal");
|
||||||
this->prefix_ += std::string(tk_start, input.hi());
|
|
||||||
|
|
||||||
log && log(xtag("captured-prefix2", this->prefix_));
|
return result_type::make_error
|
||||||
|
(error_type(__FUNCTION__ /*src_function*/,
|
||||||
|
"unterminated string literal",
|
||||||
|
input_state_,
|
||||||
|
(ix - tk_start)),
|
||||||
|
this->input_state_);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* ix is start of some token */
|
/* ix is start of some token */
|
||||||
|
|
@ -941,8 +908,13 @@ namespace xo {
|
||||||
/* include next char and complete token */
|
/* include next char and complete token */
|
||||||
++ix;
|
++ix;
|
||||||
|
|
||||||
return scan_completion(whitespace_span, ix /*token_end*/, input,
|
log && log("complete '->' token");
|
||||||
this->input_state_);
|
|
||||||
|
this->input_state_.advance_until(ix);
|
||||||
|
|
||||||
|
return assemble_token(whitespace_z,
|
||||||
|
span_type(tk_start, ix) /*token*/,
|
||||||
|
input_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* here: -123, -.5e-21 for example */
|
/* here: -123, -.5e-21 for example */
|
||||||
|
|
@ -959,9 +931,14 @@ namespace xo {
|
||||||
CharT ch2 = *ix;
|
CharT ch2 = *ix;
|
||||||
|
|
||||||
if (ch2 != '=') {
|
if (ch2 != '=') {
|
||||||
|
log && log("complete '>=' token");
|
||||||
|
|
||||||
|
this->input_state_.advance_until(ix);
|
||||||
|
|
||||||
/* ignore next char and complete token */
|
/* ignore next char and complete token */
|
||||||
return scan_completion(whitespace_span, ix /*token_end*/, input,
|
return assemble_token(whitespace_z,
|
||||||
this->input_state_);
|
span_type(tk_start, ix) /*token*/,
|
||||||
|
this->input_state_);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* here: >= for example */
|
/* here: >= for example */
|
||||||
|
|
@ -1003,18 +980,28 @@ namespace xo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef OBSOLETE
|
||||||
if (ix == input.hi()) {
|
if (ix == input.hi()) {
|
||||||
/* need more input to know if/when token complete */
|
/* need more input to know if/when token complete */
|
||||||
this->prefix_ += std::string(tk_start, input.hi());
|
this->prefix_ += std::string(tk_start, input.hi());
|
||||||
|
|
||||||
log && log(xtag("captured-prefix5", this->prefix_));
|
log && log(xtag("captured-prefix5", this->prefix_));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return scan_completion(whitespace_span, ix /*token_end*/, input,
|
log && log("assemble token z", xtag("token_z", ix - tk_start));
|
||||||
this->input_state_);
|
|
||||||
|
assert(tk_start < ix);
|
||||||
|
|
||||||
|
this->input_state_.advance_until(ix);
|
||||||
|
|
||||||
|
return assemble_token(whitespace_z,
|
||||||
|
span_type(tk_start, ix) /*token*/,
|
||||||
|
this->input_state_);
|
||||||
} /*scan*/
|
} /*scan*/
|
||||||
|
|
||||||
|
#ifdef OBSOLETE
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
auto
|
auto
|
||||||
tokenizer<CharT>::scan2(const span_type & input, bool eof) -> result_type {
|
tokenizer<CharT>::scan2(const span_type & input, bool eof) -> result_type {
|
||||||
|
|
@ -1039,15 +1026,19 @@ namespace xo {
|
||||||
span_type::concat(sr.consumed(), sr2.consumed()),
|
span_type::concat(sr.consumed(), sr2.consumed()),
|
||||||
sr2.error());
|
sr2.error());
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef OBSOLETE
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
auto
|
auto
|
||||||
tokenizer<CharT>::consume(const span_type & consumed, const span_type & input) -> span_type
|
tokenizer<CharT>::consume(const span_type & consumed,
|
||||||
|
const span_type & input) -> span_type
|
||||||
{
|
{
|
||||||
this->input_state_.consume(consumed.size());
|
this->input_state_.consume(consumed.size());
|
||||||
|
|
||||||
return input.after_prefix(consumed);
|
return input.after_prefix(consumed);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
void
|
void
|
||||||
|
|
@ -1056,6 +1047,7 @@ namespace xo {
|
||||||
this->input_state_.discard_current_line();
|
this->input_state_.discard_current_line();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef OBSOLETE
|
||||||
template <typename CharT>
|
template <typename CharT>
|
||||||
auto
|
auto
|
||||||
tokenizer<CharT>::notify_eof(const span_type & input) -> result_type {
|
tokenizer<CharT>::notify_eof(const span_type & input) -> result_type {
|
||||||
|
|
@ -1063,20 +1055,12 @@ namespace xo {
|
||||||
|
|
||||||
log && log(xtag("prefix_", prefix_), xtag("prefix_.size", prefix_.size()), xtag("input", input));
|
log && log(xtag("prefix_", prefix_), xtag("prefix_.size", prefix_.size()), xtag("input", input));
|
||||||
|
|
||||||
if (this->prefix_.empty()) {
|
/* almost meretricious to include input here,
|
||||||
/* almost meretricious to include input here,
|
* when called from scan2() it can only be whitespace
|
||||||
* when called from scan2() it can only be whitespace
|
*/
|
||||||
*/
|
return result_type::make_whitespace(input);
|
||||||
return result_type::make_whitespace(input);
|
|
||||||
} else {
|
|
||||||
auto retval = assemble_final_token(span_type::from_string(prefix_),
|
|
||||||
this->input_state_);
|
|
||||||
|
|
||||||
this->prefix_.clear();
|
|
||||||
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
} /*notify_eof*/
|
} /*notify_eof*/
|
||||||
|
#endif
|
||||||
} /*namespace scm*/
|
} /*namespace scm*/
|
||||||
} /*namespace xo*/
|
} /*namespace xo*/
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -121,22 +121,22 @@ namespace xo {
|
||||||
|
|
||||||
if (!error_description_.empty()) {
|
if (!error_description_.empty()) {
|
||||||
const char * prefix = "input: ";
|
const char * prefix = "input: ";
|
||||||
/* input_state.current_pos: position of first character following preceding token.
|
/* input_state.tk_start: position of first character in token
|
||||||
* input_state.whitespace: whitespace between current_pos and start of failing token
|
* input_state.current_pos: position of first character following preceding token.
|
||||||
* error_pos: position (relative to start) at which failure detected
|
* error_pos: position (relative to start) at which failure detected
|
||||||
*/
|
*/
|
||||||
const size_t tk_start = input_state_.current_pos() + input_state_.whitespace();
|
const size_t tk_start = input_state_.tk_start();
|
||||||
const size_t tk_indent = (strlen(prefix) + tk_start);
|
const size_t tk_indent = (strlen(prefix) + tk_start);
|
||||||
const size_t error_pos = 1 + tk_start + error_pos_;
|
const size_t error_pos = 1 + tk_start + error_pos_;
|
||||||
|
|
||||||
os << "char: " << error_pos << endl;
|
os << "token col: " << tk_start << ", error col: " << error_pos << "\n";
|
||||||
os << prefix;
|
os << prefix;
|
||||||
for (const char *p = input_state_.current_line().lo(),
|
for (const char *p = input_state_.current_line().lo(),
|
||||||
*e = input_state_.current_line().hi(); p < e; ++p)
|
*e = input_state_.current_line().hi(); p < e; ++p)
|
||||||
{
|
{
|
||||||
os << *p;
|
os << *p;
|
||||||
}
|
}
|
||||||
os << endl;
|
//os << endl;
|
||||||
os << std::setw(tk_indent) << " ";
|
os << std::setw(tk_indent) << " ";
|
||||||
|
|
||||||
for (size_t i = 0; i < error_pos_; ++i) {
|
for (size_t i = 0; i < error_pos_; ++i) {
|
||||||
|
|
|
||||||
|
|
@ -232,7 +232,7 @@ namespace xo {
|
||||||
in_span(testcase.input_.c_str(),
|
in_span(testcase.input_.c_str(),
|
||||||
testcase.input_.c_str() + testcase.input_.size());
|
testcase.input_.c_str() + testcase.input_.size());
|
||||||
|
|
||||||
auto sr = tkz.scan2(in_span, true /*eof*/);
|
auto sr = tkz.scan(in_span, true /*eof*/);
|
||||||
|
|
||||||
REHEARSE(rh, sr.get_token().tk_type() == testcase.expected_tk_.tk_type());
|
REHEARSE(rh, sr.get_token().tk_type() == testcase.expected_tk_.tk_type());
|
||||||
if (sr.get_token().tk_type() == tokentype::tk_i64)
|
if (sr.get_token().tk_type() == tokentype::tk_i64)
|
||||||
|
|
@ -408,7 +408,7 @@ namespace xo {
|
||||||
{
|
{
|
||||||
log && log(xtag("i_tk", i_tk));
|
log && log(xtag("i_tk", i_tk));
|
||||||
|
|
||||||
auto sr = tkz.scan2(in_span, in_span.empty());
|
auto sr = tkz.scan(in_span, in_span.empty());
|
||||||
const auto & tk = sr.get_token();
|
const auto & tk = sr.get_token();
|
||||||
|
|
||||||
if (tk.is_valid()) {
|
if (tk.is_valid()) {
|
||||||
|
|
@ -454,6 +454,8 @@ namespace xo {
|
||||||
make_testcase(const char * input, const char * src_function, const char * error_descr,
|
make_testcase(const char * input, const char * src_function, const char * error_descr,
|
||||||
size_t tk_start, size_t whitespace, size_t error_pos)
|
size_t tk_start, size_t whitespace, size_t error_pos)
|
||||||
{
|
{
|
||||||
|
size_t line_no = 1;
|
||||||
|
|
||||||
testcase_error retval;
|
testcase_error retval;
|
||||||
retval.input_ = input;
|
retval.input_ = input;
|
||||||
retval.expect_error_ = tkz_error_type(src_function, error_descr,
|
retval.expect_error_ = tkz_error_type(src_function, error_descr,
|
||||||
|
|
@ -548,7 +550,7 @@ namespace xo {
|
||||||
|
|
||||||
auto in_span = tokenizer::span_type::from_string(testcase.input_);
|
auto in_span = tokenizer::span_type::from_string(testcase.input_);
|
||||||
|
|
||||||
auto sr = tkz.scan2(in_span, true /*eof*/);
|
auto sr = tkz.scan(in_span, true /*eof*/);
|
||||||
|
|
||||||
REHEARSE(rh, sr.is_error());
|
REHEARSE(rh, sr.is_error());
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue