xo-tokenizer2: use xo-arena DCircularBuffer to buffer input line
This commit is contained in:
parent
b9921d4108
commit
1575f8a147
10 changed files with 106 additions and 359 deletions
|
|
@ -4,9 +4,10 @@ include(CMakeFindDependencyMacro)
|
||||||
|
|
||||||
# note: changes to find_dependency() calls here
|
# note: changes to find_dependency() calls here
|
||||||
# must coordinate with xo_dependency() calls
|
# must coordinate with xo_dependency() calls
|
||||||
# in CMakeLists.txt
|
# in src/tokenizer2/CMakeLists.txt
|
||||||
#
|
#
|
||||||
#find_dependency(xo_flatstring)
|
find_dependency(xo_arena)
|
||||||
|
find_dependency(indentlog)
|
||||||
|
|
||||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
||||||
check_required_components("@PROJECT_NAME@")
|
check_required_components("@PROJECT_NAME@")
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
#include <xo/tokenizer2/Tokenizer.hpp>
|
#include <xo/tokenizer2/Tokenizer.hpp>
|
||||||
#include <xo/tokenizer2/Token.hpp>
|
#include <xo/tokenizer2/Token.hpp>
|
||||||
#include <xo/tokenizer2/tokentype.hpp>
|
#include <xo/tokenizer2/tokentype.hpp>
|
||||||
#include <xo/tokenizer2/span.hpp>
|
#include <xo/arena/span.hpp>
|
||||||
#include <xo/indentlog/log_config.hpp>
|
#include <xo/indentlog/log_config.hpp>
|
||||||
#include <replxx.hxx>
|
#include <replxx.hxx>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
@ -14,7 +14,7 @@
|
||||||
bool replxx_getline(bool interactive,
|
bool replxx_getline(bool interactive,
|
||||||
std::size_t parser_stack_size,
|
std::size_t parser_stack_size,
|
||||||
replxx::Replxx & rx,
|
replxx::Replxx & rx,
|
||||||
std::string& input)
|
const char ** p_input)
|
||||||
{
|
{
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
@ -34,40 +34,23 @@ bool replxx_getline(bool interactive,
|
||||||
if (retval) {
|
if (retval) {
|
||||||
//cerr << "got reval->true" << endl;
|
//cerr << "got reval->true" << endl;
|
||||||
|
|
||||||
input = input_cstr;
|
*p_input = input_cstr;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//cerr << "got retval->false" << endl;
|
//cerr << "got retval->false" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
rx.history_add(input);
|
rx.history_add(input_cstr);
|
||||||
|
|
||||||
// we want tokenizer to see newline, it's syntax
|
|
||||||
input.push_back('\n');
|
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef OBSOLETE
|
|
||||||
bool repl_getline(bool interactive,
|
|
||||||
std::istream & in,
|
|
||||||
std::ostream & out,
|
|
||||||
std::string & input)
|
|
||||||
{
|
|
||||||
if (interactive) {
|
|
||||||
out << "> ";
|
|
||||||
std::flush(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
return static_cast<bool>(std::getline(in, input));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main() {
|
main() {
|
||||||
using xo::scm::Tokenizer;
|
using xo::scm::Tokenizer;
|
||||||
using xo::scm::span;
|
|
||||||
using xo::scm::operator<<;
|
using xo::scm::operator<<;
|
||||||
|
using xo::mm::CircularBufferConfig;
|
||||||
|
using xo::mm::span;
|
||||||
using replxx::Replxx;
|
using replxx::Replxx;
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
@ -82,36 +65,39 @@ main() {
|
||||||
rx.set_max_history_size(1000);
|
rx.set_max_history_size(1000);
|
||||||
rx.history_load("repl_history.txt");
|
rx.history_load("repl_history.txt");
|
||||||
|
|
||||||
Tokenizer tkz(xo::log_config::min_log_level <= xo::log_level::info);
|
Tokenizer tkz(CircularBufferConfig{.name_ = "tokenrepl-input",
|
||||||
|
.max_capacity_ = 4*1024,
|
||||||
|
.max_captured_span_ = 128},
|
||||||
|
true /*debug_flag*/);
|
||||||
|
|
||||||
string input_str;
|
const char * input_cstr = nullptr;;
|
||||||
|
|
||||||
size_t line_no = 1;
|
size_t line_no = 1;
|
||||||
|
|
||||||
constexpr std::size_t c_maxlines = 25;
|
constexpr std::size_t c_maxlines = 25;
|
||||||
|
|
||||||
while (
|
while (replxx_getline(interactive, 0 /*parser_stack_size*/, rx, &input_cstr))
|
||||||
//repl_getline(interactive, cin, cout, input_str) // once upon a time
|
|
||||||
replxx_getline(interactive, 0 /*parser_stack_size*/, rx, input_str))
|
|
||||||
{
|
{
|
||||||
span_type input = span_type::from_string(input_str);
|
|
||||||
|
|
||||||
//cout << "input: " << input << endl;
|
//cout << "input: " << input << endl;
|
||||||
|
|
||||||
// reminder: input may contain multiple tokens
|
// reminder: input may contain multiple tokens
|
||||||
while (!input.empty()) {
|
if (input_cstr && *input_cstr) {
|
||||||
auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/);
|
auto [error, input] = tkz.buffer_input_line(input_cstr, false /*!eof*/);
|
||||||
|
|
||||||
if (tk.is_valid()) {
|
{
|
||||||
cout << tk << endl;
|
auto [tk, consumed, error] = tkz.scan(input);
|
||||||
} else if (error.is_error()) {
|
|
||||||
cout << "tokenizer error: " << endl;
|
|
||||||
error.report(cout);
|
|
||||||
|
|
||||||
break;
|
if (tk.is_valid()) {
|
||||||
|
cout << tk << endl;
|
||||||
|
} else if (error.is_error()) {
|
||||||
|
cout << "tokenizer error: " << endl;
|
||||||
|
error.report(cout);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
input = input.after_prefix(consumed);
|
||||||
}
|
}
|
||||||
|
|
||||||
input = input.after_prefix(consumed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* here: input.empty() or error encountered */
|
/* here: input.empty() or error encountered */
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ namespace xo {
|
||||||
using CharT = char;
|
using CharT = char;
|
||||||
|
|
||||||
/** type representing a contiguous span of tokenizer input characters **/
|
/** type representing a contiguous span of tokenizer input characters **/
|
||||||
using span_type = span<const CharT>;
|
using span_type = xo::mm::span<const CharT>;
|
||||||
|
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
|
|
@ -76,7 +76,7 @@ namespace xo {
|
||||||
/** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
|
/** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
|
||||||
* Introduced for unit tests, not used in tokenizer.
|
* Introduced for unit tests, not used in tokenizer.
|
||||||
**/
|
**/
|
||||||
explicit TkInputState(const span<const CharT>& current_line,
|
explicit TkInputState(const span_type & current_line,
|
||||||
size_t current_pos,
|
size_t current_pos,
|
||||||
size_t whitespace) : current_line_{current_line},
|
size_t whitespace) : current_line_{current_line},
|
||||||
current_pos_{current_pos},
|
current_pos_{current_pos},
|
||||||
|
|
@ -191,7 +191,7 @@ namespace xo {
|
||||||
///@{
|
///@{
|
||||||
|
|
||||||
/** remember current input line. Used only to report errors **/
|
/** remember current input line. Used only to report errors **/
|
||||||
span<const CharT> current_line_ = span<const CharT>();
|
span_type current_line_ = span_type();
|
||||||
/** start of last token within @ref current_line_ **/
|
/** start of last token within @ref current_line_ **/
|
||||||
size_t tk_start_ = 0;
|
size_t tk_start_ = 0;
|
||||||
/** input position within @ref current_line_ **/
|
/** input position within @ref current_line_ **/
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,9 @@
|
||||||
#include "TkInputState.hpp"
|
#include "TkInputState.hpp"
|
||||||
#include "span.hpp"
|
#include "span.hpp"
|
||||||
#include "scan_result.hpp"
|
#include "scan_result.hpp"
|
||||||
#include "xo/indentlog/scope.hpp"
|
#include <xo/arena/DCircularBuffer.hpp>
|
||||||
#include "xo/indentlog/print/ppdetail_atomic.hpp"
|
#include <xo/indentlog/scope.hpp>
|
||||||
|
#include <xo/indentlog/print/ppdetail_atomic.hpp>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
namespace xo {
|
namespace xo {
|
||||||
|
|
@ -58,15 +59,24 @@ namespace xo {
|
||||||
using CharT = char;
|
using CharT = char;
|
||||||
using token_type = Token;
|
using token_type = Token;
|
||||||
using error_type = TokenizerError;
|
using error_type = TokenizerError;
|
||||||
using span_type = span<const CharT>;
|
using DCircularBuffer = xo::mm::DCircularBuffer;
|
||||||
using input_state_type = TkInputState;
|
using CircularBufferConfig = xo::mm::CircularBufferConfig;
|
||||||
|
using span_type = xo::mm::span<const CharT>;
|
||||||
|
//using input_state_type = TkInputState;
|
||||||
using result_type = scan_result;
|
using result_type = scan_result;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** @defgroup tokenizer-ctors tokenizer constructors **/
|
/** @defgroup tokenizer-ctors tokenizer constructors **/
|
||||||
///@{
|
///@{
|
||||||
|
|
||||||
Tokenizer(bool debug_flag = false);
|
/**
|
||||||
|
* @p config gives configuration for circular input buffer
|
||||||
|
* @p debug_flag enables tokenizer debug output
|
||||||
|
**/
|
||||||
|
Tokenizer(const CircularBufferConfig & config = CircularBufferConfig{.name_ = "tkz-input",
|
||||||
|
.max_capacity_ = 4*1024,
|
||||||
|
.max_captured_span_ = 128},
|
||||||
|
bool debug_flag = false);
|
||||||
|
|
||||||
///@}
|
///@}
|
||||||
|
|
||||||
|
|
@ -119,6 +129,11 @@ namespace xo {
|
||||||
**/
|
**/
|
||||||
bool has_prefix() const { return !prefix_.empty(); }
|
bool has_prefix() const { return !prefix_.empty(); }
|
||||||
|
|
||||||
|
/** buffer contents of input_cstr.
|
||||||
|
* May throw if buffer space exhausted
|
||||||
|
**/
|
||||||
|
std::pair<input_error, span_type> buffer_input_line(const char * input_cstr, bool eof_flag);
|
||||||
|
|
||||||
/** scan for next input token, given @p input.
|
/** scan for next input token, given @p input.
|
||||||
* Note:
|
* Note:
|
||||||
* - tokenizer can consume input (e.g. whitespace)
|
* - tokenizer can consume input (e.g. whitespace)
|
||||||
|
|
@ -130,8 +145,7 @@ namespace xo {
|
||||||
*
|
*
|
||||||
* @return {parsed token, consumed span}
|
* @return {parsed token, consumed span}
|
||||||
**/
|
**/
|
||||||
scan_result scan(const span_type & input,
|
scan_result scan(const span_type & input);
|
||||||
bool eof_flag);
|
|
||||||
|
|
||||||
/** discard current line after error. Just cleans up error-reporting state **/
|
/** discard current line after error. Just cleans up error-reporting state **/
|
||||||
void discard_current_line();
|
void discard_current_line();
|
||||||
|
|
@ -142,6 +156,8 @@ namespace xo {
|
||||||
/** @defgroup tokenizer-instance-vars tokenizer instance variables **/
|
/** @defgroup tokenizer-instance-vars tokenizer instance variables **/
|
||||||
///@{
|
///@{
|
||||||
|
|
||||||
|
/** Buffer input here. vm-aware. uses mmap directly **/
|
||||||
|
DCircularBuffer input_buffer_;
|
||||||
/** track input state (line#,pos,..) for error messages.
|
/** track input state (line#,pos,..) for error messages.
|
||||||
* There's an ordering problem here:
|
* There's an ordering problem here:
|
||||||
* 1. input_state_.skip_leading_whitespace() advances
|
* 1. input_state_.skip_leading_whitespace() advances
|
||||||
|
|
@ -150,7 +166,7 @@ namespace xo {
|
||||||
* 3. but neeed newline to end token
|
* 3. but neeed newline to end token
|
||||||
* Also recall input_state_type needed for reporting errors.
|
* Also recall input_state_type needed for reporting errors.
|
||||||
**/
|
**/
|
||||||
input_state_type input_state_;
|
TkInputState input_state_;
|
||||||
/** Accumulate partial token here.
|
/** Accumulate partial token here.
|
||||||
* This will happen if input sent to @ref tokenizer::scan
|
* This will happen if input sent to @ref tokenizer::scan
|
||||||
* ends without whitespace such that last available token's
|
* ends without whitespace such that last available token's
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ namespace xo {
|
||||||
class TokenizerError {
|
class TokenizerError {
|
||||||
public:
|
public:
|
||||||
using CharT = char;
|
using CharT = char;
|
||||||
using span_type = span<const CharT>;
|
using span_type = xo::mm::span<const CharT>;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** @defgroup tokenizer-error-ctors **/
|
/** @defgroup tokenizer-error-ctors **/
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ namespace xo {
|
||||||
public:
|
public:
|
||||||
using CharT = char;
|
using CharT = char;
|
||||||
using token_type = Token;
|
using token_type = Token;
|
||||||
using span_type = span<const CharT>;
|
using span_type = xo::mm::span<const CharT>;
|
||||||
using error_type = TokenizerError;
|
using error_type = TokenizerError;
|
||||||
using input_state_type = TkInputState;
|
using input_state_type = TkInputState;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,291 +0,0 @@
|
||||||
/** @file span.hpp **/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "xo/indentlog/scope.hpp"
|
|
||||||
#include "xo/indentlog/print/ppdetail_atomic.hpp"
|
|
||||||
#include <ostream>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
namespace xo {
|
|
||||||
namespace scm {
|
|
||||||
/** @class span compression/span.hpp
|
|
||||||
*
|
|
||||||
* @brief A contiguous range of characters, without ownership.
|
|
||||||
*
|
|
||||||
* @tparam CharT type for elements referred to by this span.
|
|
||||||
**/
|
|
||||||
template <typename CharT>
|
|
||||||
class span {
|
|
||||||
public:
|
|
||||||
/** @defgroup span-type-traits span type traits **/
|
|
||||||
///@{
|
|
||||||
|
|
||||||
/** typealias for span size (in units of CharT) **/
|
|
||||||
using size_type = std::uint64_t;
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
public:
|
|
||||||
/** @defgroup span-ctors span constructors **/
|
|
||||||
///@{
|
|
||||||
|
|
||||||
/** null span **/
|
|
||||||
span() : lo_{nullptr}, hi_{nullptr} {}
|
|
||||||
|
|
||||||
/** Create span for the contiguous memory range [@p lo, @p hi) **/
|
|
||||||
span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}
|
|
||||||
|
|
||||||
/** explicit conversion from span<U> **/
|
|
||||||
template<typename CharU>
|
|
||||||
span(const span<CharU> & other,
|
|
||||||
std::enable_if_t<std::is_convertible_v<CharU*, CharT*>
|
|
||||||
&& !std::is_same_v<CharU, CharT>> * = nullptr)
|
|
||||||
: lo_{other.lo()}, hi_{other.hi()} {}
|
|
||||||
|
|
||||||
/** copy ctor (explicit to avoid ambiguity with template ctor) **/
|
|
||||||
span(const span & other) = default;
|
|
||||||
span & operator=(const span & other) = default;
|
|
||||||
|
|
||||||
/** Create a null span (i.e. with null @p lo, @p hi pointers)
|
|
||||||
* A null span can be concatenated with any other span
|
|
||||||
* without triggering matching-endpoint asserts.
|
|
||||||
**/
|
|
||||||
static span make_null() { return span(static_cast<CharT*>(nullptr), static_cast<CharT*>(nullptr)); }
|
|
||||||
|
|
||||||
/** @brief create span for C-style string @p cstr **/
|
|
||||||
static span from_cstr(const CharT * cstr) {
|
|
||||||
CharT * lo = cstr;
|
|
||||||
CharT * hi = cstr ? cstr + strlen(cstr) : nullptr;
|
|
||||||
|
|
||||||
return span(lo, hi);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief create span from std::string @p str **/
|
|
||||||
static span from_string(const std::string& str) {
|
|
||||||
CharT * lo = &(*str.begin());
|
|
||||||
CharT * hi = &(*str.end());
|
|
||||||
|
|
||||||
return span(lo, hi);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief concatenate two contiguous spans */
|
|
||||||
static span concat(const span & span1, const span & span2) {
|
|
||||||
if (span1.is_null())
|
|
||||||
return span2;
|
|
||||||
if (span2.is_null())
|
|
||||||
return span1;
|
|
||||||
|
|
||||||
if (span1.hi() != span2.lo()) {
|
|
||||||
scope log(XO_DEBUG(true));
|
|
||||||
|
|
||||||
log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo()));
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(span1.hi() == span2.lo());
|
|
||||||
|
|
||||||
CharT * lo = span1.lo();
|
|
||||||
CharT * hi = span2.hi();
|
|
||||||
|
|
||||||
return span(lo, hi);
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
/** @defgroup span-access-methods **/
|
|
||||||
///@{
|
|
||||||
|
|
||||||
CharT * lo() const { return lo_; } /* get member span::lo_ */
|
|
||||||
CharT * hi() const { return hi_; } /* get member span::hi_ */
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
/** @defgroup span-general-methods **/
|
|
||||||
///@{
|
|
||||||
|
|
||||||
/** @brief strip prefix until first occurence of '\n', including the newline **/
|
|
||||||
void discard_until_newline() {
|
|
||||||
for (const CharT * p = lo_; p < hi_; ++p) {
|
|
||||||
if (*p == '\n') {
|
|
||||||
lo_ = p + 1;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lo_ = hi_;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Create new span over supplied type,
|
|
||||||
* with identical (possibly misaligned) endpoints.
|
|
||||||
*
|
|
||||||
* @warning
|
|
||||||
* 1. New span uses exactly the same memory addresses.
|
|
||||||
* Endpoint pointers may not be aligned.
|
|
||||||
* 2. Implementation assumes code compiled with
|
|
||||||
* @code -fno-strict-aliasing @endcode enabled.
|
|
||||||
*
|
|
||||||
* @tparam OtherT element type for new span
|
|
||||||
**/
|
|
||||||
template <typename OtherT>
|
|
||||||
span<OtherT>
|
|
||||||
cast() const { return span<OtherT>(reinterpret_cast<OtherT *>(lo_),
|
|
||||||
reinterpret_cast<OtherT *>(hi_)); }
|
|
||||||
|
|
||||||
/** @brief create span including the first @p z members of this span. **/
|
|
||||||
span prefix(size_type z) const { return span(lo_, lo_ + z); }
|
|
||||||
|
|
||||||
/** @brief create span representing prefix up to (but not including) @p *p
|
|
||||||
**/
|
|
||||||
span prefix_upto(CharT * p) const {
|
|
||||||
if (p <= hi_)
|
|
||||||
return span(lo_, p);
|
|
||||||
else
|
|
||||||
return span(lo_, hi_);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief create span with first @p z members of this span removed **/
|
|
||||||
span after_prefix(size_type z) const {
|
|
||||||
if (lo_ + z > hi_)
|
|
||||||
z = hi_ - lo_;
|
|
||||||
|
|
||||||
return span(lo_ + z, hi_);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief create span with @p prefix of this span removed **/
|
|
||||||
span after_prefix(const span & prefix) const {
|
|
||||||
if (!prefix.is_null() && (prefix.lo() != lo_)) {
|
|
||||||
throw std::runtime_error
|
|
||||||
("after_prefix: expected prefix of this span");
|
|
||||||
}
|
|
||||||
|
|
||||||
return after_prefix(prefix.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Create span starting with position @p p.
|
|
||||||
* Does boundary checking; will return empty span if @p p is outside @c [lo_,hi)
|
|
||||||
**/
|
|
||||||
span suffix_from(CharT * p) const {
|
|
||||||
if ((lo_ <= p) && (p <= hi_))
|
|
||||||
return span(p, hi_);
|
|
||||||
else
|
|
||||||
return span(hi_, hi_);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** true iff this span is null. distinct from empty. **/
|
|
||||||
bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
|
|
||||||
/** true iff this span is empty (comprises 0 elements). **/
|
|
||||||
bool empty() const { return lo_ == hi_; }
|
|
||||||
/** report the number of elements (of type CharT) in this span. **/
|
|
||||||
size_type size() const { return hi_ - lo_; }
|
|
||||||
|
|
||||||
/** increase extent of this spans to include @p x.
|
|
||||||
* Requires @c hi() == @c x.lo()
|
|
||||||
**/
|
|
||||||
span & operator+=(const span & x) {
|
|
||||||
if (hi_ == x.lo_) {
|
|
||||||
hi_ = x.hi_;
|
|
||||||
} else if (!x.is_null()) {
|
|
||||||
assert(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** print representation for this span on stream @p os **/
|
|
||||||
void print(std::ostream & os) const {
|
|
||||||
os << "<span"
|
|
||||||
<< xtag("addr", (void*)lo_)
|
|
||||||
<< xtag("size", size())
|
|
||||||
<< " :text " << xo::print::quot(std::string_view(lo_, hi_))
|
|
||||||
<< ">";
|
|
||||||
}
|
|
||||||
///@}
|
|
||||||
|
|
||||||
private:
|
|
||||||
/** @defgroup span-instance-vars **/
|
|
||||||
///@{
|
|
||||||
|
|
||||||
/** start of span.
|
|
||||||
Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
|
|
||||||
**/
|
|
||||||
CharT * lo_ = nullptr;
|
|
||||||
|
|
||||||
/** @brief end of span.
|
|
||||||
Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
|
|
||||||
**/
|
|
||||||
CharT * hi_ = nullptr;
|
|
||||||
|
|
||||||
///@}
|
|
||||||
}; /*span*/
|
|
||||||
|
|
||||||
/** @defgroup span-operators **/
|
|
||||||
///@{
|
|
||||||
|
|
||||||
/** compare spans for equality.
|
|
||||||
* Two spans are equal iff both endpoints match exactly.
|
|
||||||
**/
|
|
||||||
template <typename CharT>
|
|
||||||
inline bool
|
|
||||||
operator==(const span<CharT> & lhs, const span<CharT> & rhs) {
|
|
||||||
return ((lhs.lo() == rhs.lo())
|
|
||||||
&& (lhs.hi() == rhs.hi()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** compare spans for inequality.
|
|
||||||
* Two spans are unequal if either paired endpoint differs.
|
|
||||||
**/
|
|
||||||
template <typename CharT>
|
|
||||||
inline bool
|
|
||||||
operator!=(const span<CharT> & lhs, const span<CharT> & rhs) {
|
|
||||||
return ((lhs.lo() != rhs.lo())
|
|
||||||
|| (lhs.hi() != rhs.hi()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** print a summary of @p x on stream @p os. Intended for diagnostics **/
|
|
||||||
template <typename CharT>
|
|
||||||
inline std::ostream &
|
|
||||||
operator<<(std::ostream & os,
|
|
||||||
const span<CharT> & x) {
|
|
||||||
x.print(os);
|
|
||||||
return os;
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
} /*namespace scm*/
|
|
||||||
|
|
||||||
namespace print {
|
|
||||||
template <typename CharT>
|
|
||||||
class printspan_impl {
|
|
||||||
public:
|
|
||||||
printspan_impl(xo::scm::span<CharT> x) : span_{x} {}
|
|
||||||
|
|
||||||
xo::scm::span<CharT> span_;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename CharT>
|
|
||||||
printspan_impl<CharT> printspan(const xo::scm::span<CharT>& span) {
|
|
||||||
return printspan_impl<CharT>(span);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename CharT>
|
|
||||||
inline std::ostream &
|
|
||||||
operator<< (std::ostream & os,
|
|
||||||
const printspan_impl<CharT> & x)
|
|
||||||
{
|
|
||||||
for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p)
|
|
||||||
os << *p;
|
|
||||||
|
|
||||||
return os;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef ppdetail_atomic
|
|
||||||
template <typename CharT> \
|
|
||||||
PPDETAIL_ATOMIC_BODY(printspan_impl<CharT>);
|
|
||||||
|
|
||||||
template <typename CharT> \
|
|
||||||
PPDETAIL_ATOMIC_BODY(xo::scm::span<CharT>);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
|
||||||
} /*namespace xo*/
|
|
||||||
|
|
@ -10,6 +10,8 @@ set(SELF_SRCS
|
||||||
tokentype.cpp)
|
tokentype.cpp)
|
||||||
|
|
||||||
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
|
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
|
||||||
|
# deps must coordinate with xo-tokenizer/cmake/xo_tokenizer2Config.cmake.in
|
||||||
|
xo_dependency(${SELF_LIB} xo_arena)
|
||||||
xo_dependency(${SELF_LIB} indentlog)
|
xo_dependency(${SELF_LIB} indentlog)
|
||||||
|
|
||||||
# end CMakeLists.txt
|
# end CMakeLists.txt
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,8 @@ namespace xo {
|
||||||
// for example including leading whitespace.
|
// for example including leading whitespace.
|
||||||
// See discussion in tokenizer scan() method
|
// See discussion in tokenizer scan() method
|
||||||
|
|
||||||
scope log(XO_DEBUG(debug_flag_));
|
scope log(XO_DEBUG(debug_flag_),
|
||||||
|
xtag("input", input));
|
||||||
|
|
||||||
/* look ahead to {end of line, end of input}, whichever comes first */
|
/* look ahead to {end of line, end of input}, whichever comes first */
|
||||||
const CharT * sol = input.lo();
|
const CharT * sol = input.lo();
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,13 @@
|
||||||
#include "Tokenizer.hpp"
|
#include "Tokenizer.hpp"
|
||||||
|
|
||||||
namespace xo {
|
namespace xo {
|
||||||
|
using std::byte;
|
||||||
|
|
||||||
namespace scm {
|
namespace scm {
|
||||||
Tokenizer::Tokenizer(bool debug_flag)
|
Tokenizer::Tokenizer(const CircularBufferConfig & config,
|
||||||
: input_state_{debug_flag}
|
bool debug_flag)
|
||||||
|
: input_buffer_{DCircularBuffer::map(config)},
|
||||||
|
input_state_{debug_flag}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -108,7 +112,7 @@ namespace xo {
|
||||||
auto
|
auto
|
||||||
Tokenizer::assemble_token(std::size_t initial_whitespace,
|
Tokenizer::assemble_token(std::size_t initial_whitespace,
|
||||||
const span_type & token_text,
|
const span_type & token_text,
|
||||||
input_state_type * p_input_state) -> result_type
|
TkInputState * p_input_state) -> result_type
|
||||||
{
|
{
|
||||||
/* literal|pretty|streamlined */
|
/* literal|pretty|streamlined */
|
||||||
log_config::style = function_style::streamlined;
|
log_config::style = function_style::streamlined;
|
||||||
|
|
@ -600,7 +604,7 @@ namespace xo {
|
||||||
|
|
||||||
auto
|
auto
|
||||||
Tokenizer::assemble_final_token(const span_type & token_text,
|
Tokenizer::assemble_final_token(const span_type & token_text,
|
||||||
input_state_type * p_input_state) -> result_type
|
TkInputState * p_input_state) -> result_type
|
||||||
{
|
{
|
||||||
return assemble_token(0 /*initial_whitespace*/,
|
return assemble_token(0 /*initial_whitespace*/,
|
||||||
token_text,
|
token_text,
|
||||||
|
|
@ -608,12 +612,43 @@ namespace xo {
|
||||||
}
|
}
|
||||||
|
|
||||||
auto
|
auto
|
||||||
Tokenizer::scan(const span_type & input,
|
Tokenizer::buffer_input_line(const char * input_cstr,
|
||||||
bool eof_flag) -> result_type
|
bool eof_flag) -> std::pair<input_error, span_type>
|
||||||
{
|
{
|
||||||
scope log(XO_DEBUG(input_state_.debug_flag()));
|
scope log(XO_DEBUG(input_state_.debug_flag()));
|
||||||
|
|
||||||
log && log(xtag("input", input));
|
log && log(xtag("input", input_cstr));
|
||||||
|
|
||||||
|
auto buf_input_0 = input_buffer_.input_range().hi();
|
||||||
|
|
||||||
|
auto remainder = input_buffer_.append
|
||||||
|
(DCircularBuffer::const_span_type
|
||||||
|
((const byte *)input_cstr,
|
||||||
|
(const byte *)input_cstr + strlen(input_cstr)));
|
||||||
|
|
||||||
|
const char * newline_cstr = "\n";
|
||||||
|
auto remainder2 = input_buffer_.append
|
||||||
|
(DCircularBuffer::const_span_type
|
||||||
|
((const byte *)newline_cstr,
|
||||||
|
(const byte *)newline_cstr + strlen(newline_cstr)));
|
||||||
|
|
||||||
|
if (!remainder.empty() || !remainder2.empty()) {
|
||||||
|
throw std::runtime_error(tostr("Tokenizer::buffer_line: line too long!",
|
||||||
|
xtag("remainder.size", remainder.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto buf_input_1 = input_buffer_.input_range().hi();
|
||||||
|
|
||||||
|
span_type input = span_type((const char *)buf_input_0,
|
||||||
|
(const char *)buf_input_1);
|
||||||
|
|
||||||
|
return this->input_state_.capture_current_line(input, eof_flag);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto
|
||||||
|
Tokenizer::scan(const span_type & input) -> result_type
|
||||||
|
{
|
||||||
|
scope log(XO_DEBUG(input_state_.debug_flag()));
|
||||||
|
|
||||||
/* - Always at beginning of token when scan() invoked
|
/* - Always at beginning of token when scan() invoked
|
||||||
* - scan will not report any portion of line as consumed until it has
|
* - scan will not report any portion of line as consumed until it has
|
||||||
|
|
@ -625,9 +660,6 @@ namespace xo {
|
||||||
* with the same input span multiple times
|
* with the same input span multiple times
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* automagically no-ops when the same input presented twice */
|
|
||||||
this->input_state_.capture_current_line(input, eof_flag);
|
|
||||||
|
|
||||||
const CharT * ix = this->input_state_.skip_leading_whitespace();
|
const CharT * ix = this->input_state_.skip_leading_whitespace();
|
||||||
|
|
||||||
if(ix == input.hi()) {
|
if(ix == input.hi()) {
|
||||||
|
|
@ -789,7 +821,7 @@ namespace xo {
|
||||||
* - punctuation
|
* - punctuation
|
||||||
*/
|
*/
|
||||||
for (; ix != input.hi(); ++ix) {
|
for (; ix != input.hi(); ++ix) {
|
||||||
if (input_state_type::is_whitespace(*ix)
|
if (TkInputState::is_whitespace(*ix)
|
||||||
|| is_1char_punctuation(*ix)
|
|| is_1char_punctuation(*ix)
|
||||||
|| is_2char_punctuation(*ix))
|
|| is_2char_punctuation(*ix))
|
||||||
{
|
{
|
||||||
|
|
@ -829,7 +861,7 @@ namespace xo {
|
||||||
return assemble_token(whitespace_z,
|
return assemble_token(whitespace_z,
|
||||||
span_type(tk_start, ix) /*token*/,
|
span_type(tk_start, ix) /*token*/,
|
||||||
&(this->input_state_));
|
&(this->input_state_));
|
||||||
} /*scan*/
|
} /*_scan_aux*/
|
||||||
} /*namespace scm*/
|
} /*namespace scm*/
|
||||||
} /*namespace xo*/
|
} /*namespace xo*/
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue