xo-tokenizer: streamline error path during tokenization

This commit is contained in:
Roland Conybeare 2025-11-22 23:06:51 -05:00
commit 2f7155e57b
3 changed files with 75 additions and 61 deletions

View file

@ -64,10 +64,10 @@ namespace xo {
// 5.
byte * base = reinterpret_cast<byte *>(::mmap(nullptr,
z + c_hugepage_z,
PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0));
z + c_hugepage_z,
PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0));
log && log("acquired memory [lo,hi) using mmap",
xtag("lo", base),
@ -101,7 +101,13 @@ namespace xo {
::munmap(aligned_hi, suffix); // 7.
}
#ifdef __linux__
::madvise(aligned_base, z, MADV_HUGEPAGE); // 8.
#endif
// TODO: for OSX -> need something else here.
// MAP_ALIGNED_SUPER with mmap() and/or
// use mach_vm_allocate()
//
this->lo_ = aligned_base;
this->committed_z_ = 0;

View file

@ -42,8 +42,17 @@ namespace xo {
static scan_result make_whitespace(const span_type & prefix_input);
static scan_result make_partial(const span_type & prefix_input);
static scan_result make_error(const error_type & error,
input_state_type & input_state_ref);
/**
* @p error_src can be __FUNCTION__ from site where error generated.
* @p error_msg error message
* @p error_pos error position, relative to start of token
* @p input_state_ref input state object;
* copied into scan_result, and leaving input_state_ref.current_line cleared
**/
static scan_result make_error_consume_current_line(const char * error_src,
std::string error_msg,
size_t error_pos,
input_state_type & input_state_ref);
bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
bool is_token() const { return token_.is_valid(); }
@ -78,14 +87,23 @@ namespace xo {
}
template <typename CharT>
auto scan_result<CharT>::make_error(const error_type & error,
input_state_type & input_state_ref) -> scan_result
auto
scan_result<CharT>::make_error_consume_current_line(const char * error_src,
std::string error_msg,
size_t error_pos,
input_state_type & input_state_ref) -> scan_result
{
/* report+consume entire input line */
/* copy before altered by .consume_current_line() */
input_state_type input_state_copy = input_state_ref;
return scan_result(token_type::invalid(),
input_state_ref.consume_current_line(),
error);
error_type(error_src,
error_msg,
input_state_copy,
error_pos));
}
} /*namespace scm*/

View file

@ -360,32 +360,28 @@ namespace xo {
} else if (exponent_flag && !exponent_digit_flag) {
exponent_sign_flag = true;
} else {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"improperly placed sign indicator",
input_state_ref,
(ix - tk_start)
),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"improperly placed sign indicator",
(ix - tk_start),
input_state_ref);
}
} else if (*ix == '.') {
if (period_flag) {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"duplicate decimal point in numeric literal",
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"duplicate decimal point in numeric literal",
(ix - tk_start),
input_state_ref);
}
period_flag = true;
} else if ((*ix == 'e') || (*ix == 'E')) {
if (exponent_flag) {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"duplicate exponent marker in numeric literal",
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"duplicate exponent marker in numeric literal",
(ix - tk_start),
input_state_ref);
}
@ -398,11 +394,10 @@ namespace xo {
number_flag = true;
}
} else {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"unexpected character in numeric constant" /*error_description*/,
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"unexpected character in numeric constant" /*error_description*/,
(ix - tk_start),
input_state_ref);
}
}
@ -502,11 +497,10 @@ namespace xo {
++ix;
if (ix == token_text.hi()) {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"expecting key following escape character \\",
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"expecting key following escape character \\",
(ix - tk_start),
input_state_ref);
}
@ -532,11 +526,10 @@ namespace xo {
tk_text.push_back('"');
break;
default:
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"expecting one of n|r|\"|\\ following escape \\",
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"expecting one of n|r|\"|\\ following escape \\",
(ix - tk_start),
input_state_ref);
}
break;
@ -550,11 +543,10 @@ namespace xo {
}
if (!endofstring) {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"missing terminating '\"' to complete literal string",
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"missing terminating '\"' to complete literal string",
(ix - tk_start),
input_state_ref);
}
@ -693,11 +685,10 @@ namespace xo {
}
if (tk_type == tokentype::tk_invalid) {
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"illegal input character",
input_state_ref,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"illegal input character",
(ix - tk_start),
input_state_ref);
}
@ -768,7 +759,8 @@ namespace xo {
template <typename CharT>
auto
tokenizer<CharT>::scan(const span_type & input, bool eof_flag) -> result_type
tokenizer<CharT>::scan(const span_type & input,
bool eof_flag) -> result_type
{
scope log(XO_DEBUG(input_state_.debug_flag()));
@ -871,11 +863,10 @@ namespace xo {
} else if ((*ix == '\n') || (*ix == '\r')) {
log && log ("string literal with naked newline or CR");
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"must use \\n or \\r to encode newline/cr in string literal",
input_state_,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"must use \\n or \\r to encode newline/cr in string literal",
(ix - tk_start),
this->input_state_);
}
@ -885,11 +876,10 @@ namespace xo {
if (!complete_flag) {
log && log("unterminated string literal");
return result_type::make_error
(error_type(__FUNCTION__ /*src_function*/,
"unterminated string literal",
input_state_,
(ix - tk_start)),
return result_type::make_error_consume_current_line
(__FUNCTION__ /*src_function*/,
"unterminated string literal",
(ix - tk_start),
this->input_state_);
}
} else {