xo-reader2: readerreplxx works + streamline debugging

This commit is contained in:
Roland Conybeare 2026-01-23 14:57:43 -05:00
commit 610afe7677
9 changed files with 124 additions and 72 deletions

View file

@ -1,11 +1,14 @@
/** @file readerreplxx.cpp **/
#include <xo/reader2/init_reader2.hpp>
#include <xo/reader2/SchematikaReader.hpp>
#include <xo/gc/DX1Collector.hpp>
#include <xo/gc/detail/IAllocator_DX1Collector.hpp>
#include <xo/alloc2/Allocator.hpp>
//#include <xo/facet/facet.hpp>
#include <xo/printable2/Printable.hpp>
#include <xo/facet/FacetRegistry.hpp>
#include <xo/facet/obj.hpp>
#include <xo/subsys/Subsystem.hpp>
#include <replxx.hxx>
#include <iostream>
#include <unistd.h> // for isatty
@ -32,7 +35,8 @@ bool replxx_getline(bool interactive,
if (retval)
*p_input = input_cstr;
rx.history_add(input_cstr);
if (input_cstr)
rx.history_add(input_cstr);
return retval;
}
@ -56,8 +60,14 @@ welcome(std::ostream & os)
namespace {
using xo::scm::SchematikaReader;
using xo::scm::AExpression;
using xo::print::APrintable;
using xo::print::ppstate_standalone;
using xo::print::ppconfig;
using xo::facet::FacetRegistry;
using xo::facet::obj;
using xo::xtag;
using xo::scope;
using std::cout;
using std::endl;
@ -69,15 +79,36 @@ namespace {
bool
reader_seq(SchematikaReader * p_reader,
SchematikaReader::span_type * p_input,
bool eof)
bool eof,
bool debug_flag)
{
scope log(XO_DEBUG(debug_flag));
if (!p_input || p_input->empty())
return true;
auto [expr, remaining, error] = p_reader->read_expr(*p_input, eof);
obj<APrintable> expr_pr;
if (expr) {
expr_pr = FacetRegistry::instance().variant<APrintable,AExpression>(expr);
assert(expr_pr);
}
if (log) {
if (expr_pr) {
log(xtag("expr", expr_pr));
}
log(xtag("remaining", remaining));
log(xtag("error", error));
}
if (expr) {
ppconfig ppc;
ppstate_standalone pps(&cout, 0, &ppc);
pps.prettyn(expr);
pps.prettyn(expr_pr);
*p_input = remaining;
@ -93,6 +124,8 @@ namespace {
return false;
} else {
*p_input = remaining;
/* partial expression or whitespace input, no error */
return true;
}
@ -110,15 +143,19 @@ main()
using xo::mm::DX1Collector;
using xo::mm::CollectorConfig;
using xo::mm::DArena;
//using xo::print::ppconfig;
//using xo::print::ppstate_standalone;
using xo::facet::with_facet;
using xo::facet::obj;
using xo::S_reader2_tag;
using xo::InitSubsys;
using xo::Subsystem;
using xo::scope;
using namespace std;
bool interactive = isatty(STDIN_FILENO);
InitSubsys<S_reader2_tag>::require();
Subsystem::initialize_all();
Replxx rx;
rx.set_max_history_size(1000);
rx.history_load("repl_history.txt");
@ -134,7 +171,12 @@ main()
obj<AAllocator> expr_alloc = with_facet<AAllocator>::mkobj(&x1);
// accepting defaults too
ReaderConfig rdr_config = ReaderConfig();
ReaderConfig rdr_config;
{
//rdr_config.reader_debug_flag_ = true;
//rdr_config.parser_debug_flag_ = true;
//rdr_config.tk_debug_flag_ = true;
}
SchematikaReader rdr(rdr_config, expr_alloc);
using span_type = SchematikaReader::span_type;
@ -144,24 +186,28 @@ main()
rdr.begin_interactive_session();
bool eof = false;
const char * input_str;
const char * input_str = nullptr;
span_type input;
while (replxx_getline(interactive, rdr.is_at_toplevel(), rx, &input_str)) {
input = span_type::from_cstr(input_str);
if (input_str && *input_str) {
input = span_type::from_cstr(input_str);
while (!input.empty() && reader_seq(&rdr, &input, false /*eof*/)) {
;
while (!input.empty()
&& reader_seq(&rdr, &input, false /*eof*/, c_debug_flag))
{
;
}
/* here: either:
* 1. input.empty() or
* 2. error encountered
*/
}
/* here: either:
* 1. input.empty() or
* 2. error encountered
*/
}
/* reminder: eof can complete at most one token */
reader_seq(&rdr, &input, true /*eof*/);
reader_seq(&rdr, &input, true /*eof*/, c_debug_flag);
rx.history_save("repl_history.txt");
}

View file

@ -11,7 +11,7 @@
namespace xo {
namespace scm {
/** @brief Configuration for SchemtikaReader
/** @brief Configuration for SchematikaReader
**/
struct ReaderConfig {
using CircularBufferConfig = xo::mm::CircularBufferConfig;
@ -26,6 +26,7 @@ namespace xo {
.max_captured_span_ = 128 };
/** debug flag for schematika tokenizer **/
bool tk_debug_flag_ = false;
/** arena configuration for parser stack **/
ArenaConfig parser_arena_config_ { .name_ = "parer-arena",
.size_ = 2*1024*1024,
@ -33,19 +34,14 @@ namespace xo {
.store_header_flag_ = false,
.header_{},
.debug_flag_ = false };
/** max size (in bytes) of stringtable **/
size_t max_stringtable_cap_ = 64*1024;
/** debug flag for schematika parser **/
bool parser_debug_flag_ = false;
#ifdef NOT_YET
/** arena configuration for output expressions **/
ArenaConfig expr_arena_config_ { .name_ = "expr-arena",
.size_ = 2*1024*1024,
.hugepage_z_ = 2*1024*1024,
.store_header_flag_ = false,
.header_{},
.debug_flag_ = false };
#endif
/** max size (in bytes) of stringtable **/
size_t max_stringtable_cap_ = 64*1024;
/** debug flag for schematika_reader **/
bool reader_debug_flag_ = false;;
};
} /*namespace scm*/

View file

@ -77,6 +77,8 @@ namespace xo {
/** current output from reader **/
ReaderResult result_;
/** true to enable reader debug logging **/
bool debug_flag_ = false;
};
} /*namespace scm*/
} /*namespace xo*/

View file

@ -550,7 +550,7 @@ namespace xo {
DDefineSsm::on_singleassign_token(const Token & tk,
ParserStateMachine * p_psm)
{
scope log(XO_DEBUG(true), xtag("defstate", defstate_));
scope log(XO_DEBUG(p_psm->debug_flag()), xtag("defstate", defstate_));
if ((defstate_ == defexprstatetype::def_2)
|| (defstate_ == defexprstatetype::def_4))

View file

@ -114,7 +114,7 @@ namespace xo {
DExpectTypeSsm::on_symbol_token(const Token & tk,
ParserStateMachine * p_psm)
{
scope log(XO_DEBUG(true));
scope log(XO_DEBUG(p_psm->debug_flag()));
TypeDescr td = nullptr;

View file

@ -216,8 +216,7 @@ namespace xo {
DProgressSsm::on_semicolon_token(const Token & tk,
ParserStateMachine * p_psm)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
scope log(XO_DEBUG(p_psm->debug_flag()));
/* note: implementation should parallel .on_rightparen_token() */

View file

@ -9,11 +9,13 @@ namespace xo {
namespace scm {
SchematikaReader::SchematikaReader(const ReaderConfig & config,
obj<AAllocator> expr_alloc)
: tokenizer_{config.tk_buffer_config_, config.tk_debug_flag_},
: tokenizer_{config.tk_buffer_config_,
config.tk_debug_flag_},
parser_{config.parser_arena_config_,
config.max_stringtable_cap_,
expr_alloc,
config.parser_debug_flag_}
config.parser_debug_flag_},
debug_flag_{config.reader_debug_flag_}
{
}
@ -35,18 +37,34 @@ namespace xo {
const ReaderResult &
SchematikaReader::read_expr(span_type input_ext, bool eof)
{
scope log(XO_DEBUG(debug_flag_));
if (log) {
log(xtag("input_ext", input_ext));
log(xtag("eof", eof));
}
if (!input_ext.empty()) {
auto [error, input]
= tokenizer_.buffer_input_line(input_ext, eof);
// log && log(xtag("msg", "buffered input line"));
// log && log(xtag("input", input));
if (log) {
log(xtag("msg", "before loop: buffered input line"));
log(xtag("input", input));
}
while (!input.empty()) {
log && log(xtag("msg", "loop"),
xtag("input", input));
auto [tk, consumed, error] = tokenizer_.scan(input);
log && log(xtag("tk", tk), xtag("consumed", consumed));
auto rem_input = input.after_prefix(consumed);
log && log(xtag("rem_input", rem_input));
if (!tk.is_valid() && error.is_error()) {
this->result_
= ReaderResult

View file

@ -109,19 +109,19 @@ namespace xo {
static bool is_2char_punctuation(CharT ch);
/** assemble token from text @p token_text.
* @p initial_whitespace Amount of whitespace input being consumed from input.
* @p ws_span whitespace preceding token
* @p token_text subset of input_line representing a single token.
* @p p_input_state input state containing input_line. On exit current line cleared
* if error
*
* retval.consumed will represent some possibly-empty prefix of @p input
**/
static scan_result assemble_token(std::size_t initial_whitespace,
const span_type & token_text,
static scan_result assemble_token( span_type ws_span,
span_type token_text,
TkInputState * p_input_state);
/** degenerate version of assemble_token() on reaching end-of-file **/
static scan_result assemble_final_token(const span_type & token_text,
static scan_result assemble_final_token(span_type token_text,
TkInputState * p_input_state);
/** true if tokenizer contains stored prefix of

View file

@ -110,8 +110,8 @@ namespace xo {
}
auto
Tokenizer::assemble_token(std::size_t initial_whitespace,
const span_type & token_text,
Tokenizer::assemble_token(span_type ws_span,
span_type token_text,
TkInputState * p_input_state) -> result_type
{
/* literal|pretty|streamlined */
@ -119,7 +119,7 @@ namespace xo {
scope log(XO_DEBUG(p_input_state->debug_flag()));
log && log(xtag("token_text", token_text),
xtag("initial_whitespace", initial_whitespace),
xtag("initial_whitespace", ws_span.size()),
xtag("input_state", *p_input_state));
tokentype tk_type = tokentype::tk_invalid;
@ -598,18 +598,16 @@ namespace xo {
// TOOD: report tk_text as span,
// but must pin / unpin
/* input.prefix(0):
* require caller preserves current input line until it's entirely exhausted
*/
return result_type(Token(tk_type, std::move(tk_text)),
p_input_state->current_line().prefix(0));
span_type::concat(ws_span,
span_type(tk_start, tk_end)));
} /*assemble_token*/
auto
Tokenizer::assemble_final_token(const span_type & token_text,
Tokenizer::assemble_final_token(span_type token_text,
TkInputState * p_input_state) -> result_type
{
return assemble_token(0 /*initial_whitespace*/,
return assemble_token(token_text.prefix(0) /*ws_span*/,
token_text,
p_input_state);
}
@ -645,6 +643,7 @@ namespace xo {
Tokenizer::scan(const span_type & input) -> result_type
{
scope log(XO_DEBUG(input_state_.debug_flag()));
log && log(xtag("input", input));
/* - Always at beginning of token when scan() invoked
* - scan will not report any portion of line as consumed until it has
@ -659,12 +658,14 @@ namespace xo {
const CharT * ix = this->input_state_.skip_leading_whitespace();
if(ix == input.hi()) {
log && log("end input -> consume current line");
log && log("end buffered input -> consume current line");
/* entirety of current line has been tokenized
* -> caller may consume it
*/
return result_type::make_whitespace(this->input_state_.consume_current_line());
this->input_state_.consume_current_line();
return result_type::make_whitespace(input);
}
/* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */
@ -697,27 +698,17 @@ namespace xo {
++ix;
#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit
if (ix == input.hi()) {
/* need more input to know if/when token complete */
this->prefix_ += std::string(tk_start, input.hi());
CharT ch2 = *ix;
log && log(xtag("captured-prefix1", this->prefix_));
} else
#endif
{
CharT ch2 = *ix;
if (((ch2 >= '0') && (ch2 <= '9'))
|| ((ch2 >= 'A') && (ch2 <= 'Z'))
|| ((ch2 >= 'a') && (ch2 <= 'z')))
if (((ch2 >= '0') && (ch2 <= '9'))
|| ((ch2 >= 'A') && (ch2 <= 'Z'))
|| ((ch2 >= 'a') && (ch2 <= 'z')))
{
/* treat as 1 char punctuation */
;
} else {
/* include next char */
++ix;
}
/* include next char */
++ix;
}
} else if (*ix == '"') {
bool complete_flag = false;
@ -779,7 +770,7 @@ namespace xo {
this->input_state_.advance_until(ix);
return assemble_token(whitespace_z,
return assemble_token(span_type(input.lo(), tk_start),
span_type(tk_start, ix) /*token*/,
&(this->input_state_));
}
@ -803,7 +794,7 @@ namespace xo {
this->input_state_.advance_until(ix);
/* ignore next char and complete token */
return assemble_token(whitespace_z,
return assemble_token(span_type(input.lo(), tk_start),
span_type(tk_start, ix) /*token*/,
&(this->input_state_));
}
@ -854,7 +845,7 @@ namespace xo {
this->input_state_.advance_until(ix);
return assemble_token(whitespace_z,
return assemble_token(span_type(input.lo(), tk_start),
span_type(tk_start, ix) /*token*/,
&(this->input_state_));
} /*_scan_aux*/