From 610afe76779f107c7cd7ebc746549c798433371d Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Fri, 23 Jan 2026 14:57:43 -0500 Subject: [PATCH] xo-reader2: readerreplxx works + streamline debugging --- .../example/readerreplxx/readerreplxx.cpp | 80 +++++++++++++++---- .../include/xo/reader2/ReaderConfig.hpp | 20 ++--- .../include/xo/reader2/SchematikaReader.hpp | 2 + xo-reader2/src/reader2/DDefineSsm.cpp | 2 +- xo-reader2/src/reader2/DExpectTypeSsm.cpp | 2 +- xo-reader2/src/reader2/DProgressSsm.cpp | 3 +- xo-reader2/src/reader2/SchematikaReader.cpp | 26 +++++- .../include/xo/tokenizer2/Tokenizer.hpp | 8 +- xo-tokenizer2/src/tokenizer2/Tokenizer.cpp | 51 +++++------- 9 files changed, 123 insertions(+), 71 deletions(-) diff --git a/xo-reader2/example/readerreplxx/readerreplxx.cpp b/xo-reader2/example/readerreplxx/readerreplxx.cpp index 76bd0520..28598292 100644 --- a/xo-reader2/example/readerreplxx/readerreplxx.cpp +++ b/xo-reader2/example/readerreplxx/readerreplxx.cpp @@ -1,11 +1,14 @@ /** @file readerreplxx.cpp **/ +#include #include #include #include #include -//#include +#include +#include #include +#include #include #include #include // for isatty @@ -32,7 +35,8 @@ bool replxx_getline(bool interactive, if (retval) *p_input = input_cstr; - rx.history_add(input_cstr); + if (input_cstr) + rx.history_add(input_cstr); return retval; } @@ -56,8 +60,14 @@ welcome(std::ostream & os) namespace { using xo::scm::SchematikaReader; + using xo::scm::AExpression; + using xo::print::APrintable; using xo::print::ppstate_standalone; using xo::print::ppconfig; + using xo::facet::FacetRegistry; + using xo::facet::obj; + using xo::xtag; + using xo::scope; using std::cout; using std::endl; @@ -69,15 +79,36 @@ namespace { bool reader_seq(SchematikaReader * p_reader, SchematikaReader::span_type * p_input, - bool eof) + bool eof, + bool debug_flag) { + scope log(XO_DEBUG(debug_flag)); + + if (!p_input || p_input->empty()) + return true; + auto [expr, remaining, error] = p_reader->read_expr(*p_input, eof); + obj expr_pr; + + if (expr) { + expr_pr = FacetRegistry::instance().variant(expr); + assert(expr_pr); + } + + if (log) { + if (expr_pr) { + log(xtag("expr", expr_pr)); + } + log(xtag("remaining", remaining)); + log(xtag("error", error)); + } + if (expr) { ppconfig ppc; ppstate_standalone pps(&cout, 0, &ppc); - pps.prettyn(expr); + pps.prettyn(expr_pr); *p_input = remaining; @@ -93,6 +124,8 @@ namespace { return false; } else { + *p_input = remaining; + /* partial expression or whitespace input, no error */ return true; } @@ -110,15 +143,19 @@ main() using xo::mm::DX1Collector; using xo::mm::CollectorConfig; using xo::mm::DArena; - //using xo::print::ppconfig; - //using xo::print::ppstate_standalone; using xo::facet::with_facet; using xo::facet::obj; + using xo::S_reader2_tag; + using xo::InitSubsys; + using xo::Subsystem; using xo::scope; using namespace std; bool interactive = isatty(STDIN_FILENO); + InitSubsys::require(); + Subsystem::initialize_all(); + Replxx rx; rx.set_max_history_size(1000); rx.history_load("repl_history.txt"); @@ -134,7 +171,12 @@ main() obj expr_alloc = with_facet::mkobj(&x1); // accepting defaults too - ReaderConfig rdr_config = ReaderConfig(); + ReaderConfig rdr_config; + { + //rdr_config.reader_debug_flag_ = true; + //rdr_config.parser_debug_flag_ = true; + //rdr_config.tk_debug_flag_ = true; + } SchematikaReader rdr(rdr_config, expr_alloc); using span_type = SchematikaReader::span_type; @@ -144,24 +186,28 @@ main() rdr.begin_interactive_session(); bool eof = false; - const char * input_str; + const char * input_str = nullptr; span_type input; while (replxx_getline(interactive, rdr.is_at_toplevel(), rx, &input_str)) { - input = span_type::from_cstr(input_str); + if (input_str && *input_str) { + input = span_type::from_cstr(input_str); - while (!input.empty() && reader_seq(&rdr, &input, false /*eof*/)) { - ; + while (!input.empty() + && reader_seq(&rdr, &input, false /*eof*/, c_debug_flag)) + { + ; + } + + /* here: either: + * 1. input.empty() or + * 2. error encountered + */ } - - /* here: either: - * 1. input.empty() or - * 2. error encountered - */ } /* reminder: eof can complete at most one token */ - reader_seq(&rdr, &input, true /*eof*/); + reader_seq(&rdr, &input, true /*eof*/, c_debug_flag); rx.history_save("repl_history.txt"); } diff --git a/xo-reader2/include/xo/reader2/ReaderConfig.hpp b/xo-reader2/include/xo/reader2/ReaderConfig.hpp index e7f0d7a6..826bb215 100644 --- a/xo-reader2/include/xo/reader2/ReaderConfig.hpp +++ b/xo-reader2/include/xo/reader2/ReaderConfig.hpp @@ -11,7 +11,7 @@ namespace xo { namespace scm { - /** @brief Configuration for SchemtikaReader + /** @brief Configuration for SchematikaReader **/ struct ReaderConfig { using CircularBufferConfig = xo::mm::CircularBufferConfig; @@ -26,6 +26,7 @@ namespace xo { .max_captured_span_ = 128 }; /** debug flag for schematika tokenizer **/ bool tk_debug_flag_ = false; + /** arena configuration for parser stack **/ ArenaConfig parser_arena_config_ { .name_ = "parer-arena", .size_ = 2*1024*1024, @@ -33,19 +34,14 @@ namespace xo { .store_header_flag_ = false, .header_{}, .debug_flag_ = false }; - /** max size (in bytes) of stringtable **/ - size_t max_stringtable_cap_ = 64*1024; /** debug flag for schematika parser **/ bool parser_debug_flag_ = false; -#ifdef NOT_YET - /** arena configuration for output expressions **/ - ArenaConfig expr_arena_config_ { .name_ = "expr-arena", - .size_ = 2*1024*1024, - .hugepage_z_ = 2*1024*1024, - .store_header_flag_ = false, - .header_{}, - .debug_flag_ = false }; -#endif + + /** max size (in bytes) of stringtable **/ + size_t max_stringtable_cap_ = 64*1024; + + /** debug flag for schematika_reader **/ + bool reader_debug_flag_ = false;; }; } /*namespace scm*/ diff --git a/xo-reader2/include/xo/reader2/SchematikaReader.hpp b/xo-reader2/include/xo/reader2/SchematikaReader.hpp index bad6c053..540c19bc 100644 --- a/xo-reader2/include/xo/reader2/SchematikaReader.hpp +++ b/xo-reader2/include/xo/reader2/SchematikaReader.hpp @@ -77,6 +77,8 @@ namespace xo { /** current output from reader **/ ReaderResult result_; + /** true to enable reader debug logging **/ + bool debug_flag_ = false; }; } /*namespace scm*/ } /*namespace xo*/ diff --git a/xo-reader2/src/reader2/DDefineSsm.cpp b/xo-reader2/src/reader2/DDefineSsm.cpp index 21ff70f0..d110df35 100644 --- a/xo-reader2/src/reader2/DDefineSsm.cpp +++ b/xo-reader2/src/reader2/DDefineSsm.cpp @@ -550,7 +550,7 @@ namespace xo { DDefineSsm::on_singleassign_token(const Token & tk, ParserStateMachine * p_psm) { - scope log(XO_DEBUG(true), xtag("defstate", defstate_)); + scope log(XO_DEBUG(p_psm->debug_flag()), xtag("defstate", defstate_)); if ((defstate_ == defexprstatetype::def_2) || (defstate_ == defexprstatetype::def_4)) diff --git a/xo-reader2/src/reader2/DExpectTypeSsm.cpp b/xo-reader2/src/reader2/DExpectTypeSsm.cpp index 30520811..e527dffa 100644 --- a/xo-reader2/src/reader2/DExpectTypeSsm.cpp +++ b/xo-reader2/src/reader2/DExpectTypeSsm.cpp @@ -114,7 +114,7 @@ namespace xo { DExpectTypeSsm::on_symbol_token(const Token & tk, ParserStateMachine * p_psm) { - scope log(XO_DEBUG(true)); + scope log(XO_DEBUG(p_psm->debug_flag())); TypeDescr td = nullptr; diff --git a/xo-reader2/src/reader2/DProgressSsm.cpp b/xo-reader2/src/reader2/DProgressSsm.cpp index ec19e994..5a9a81ec 100644 --- a/xo-reader2/src/reader2/DProgressSsm.cpp +++ b/xo-reader2/src/reader2/DProgressSsm.cpp @@ -216,8 +216,7 @@ namespace xo { DProgressSsm::on_semicolon_token(const Token & tk, ParserStateMachine * p_psm) { - constexpr bool c_debug_flag = true; - scope log(XO_DEBUG(c_debug_flag)); + scope log(XO_DEBUG(p_psm->debug_flag())); /* note: implementation should parallel .on_rightparen_token() */ diff --git a/xo-reader2/src/reader2/SchematikaReader.cpp b/xo-reader2/src/reader2/SchematikaReader.cpp index bfd14575..8aa85bf8 100644 --- a/xo-reader2/src/reader2/SchematikaReader.cpp +++ b/xo-reader2/src/reader2/SchematikaReader.cpp @@ -9,11 +9,13 @@ namespace xo { namespace scm { SchematikaReader::SchematikaReader(const ReaderConfig & config, obj expr_alloc) - : tokenizer_{config.tk_buffer_config_, config.tk_debug_flag_}, + : tokenizer_{config.tk_buffer_config_, + config.tk_debug_flag_}, parser_{config.parser_arena_config_, config.max_stringtable_cap_, expr_alloc, - config.parser_debug_flag_} + config.parser_debug_flag_}, + debug_flag_{config.reader_debug_flag_} { } @@ -35,18 +37,34 @@ namespace xo { const ReaderResult & SchematikaReader::read_expr(span_type input_ext, bool eof) { + scope log(XO_DEBUG(debug_flag_)); + + if (log) { + log(xtag("input_ext", input_ext)); + log(xtag("eof", eof)); + } + if (!input_ext.empty()) { auto [error, input] = tokenizer_.buffer_input_line(input_ext, eof); - // log && log(xtag("msg", "buffered input line")); - // log && log(xtag("input", input)); + if (log) { + log(xtag("msg", "before loop: buffered input line")); + log(xtag("input", input)); + } while (!input.empty()) { + log && log(xtag("msg", "loop"), + xtag("input", input)); + auto [tk, consumed, error] = tokenizer_.scan(input); + log && log(xtag("tk", tk), xtag("consumed", consumed)); + auto rem_input = input.after_prefix(consumed); + log && log(xtag("rem_input", rem_input)); + if (!tk.is_valid() && error.is_error()) { this->result_ = ReaderResult diff --git a/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp b/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp index 69843a5a..3dc6da11 100644 --- a/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp +++ b/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp @@ -109,19 +109,19 @@ namespace xo { static bool is_2char_punctuation(CharT ch); /** assemble token from text @p token_text. - * @p initial_whitespace Amount of whitespace input being consumed from input. + * @p ws_span whitespace preceding token * @p token_text subset of input_line representing a single token. * @p p_input_state input state containing input_line. On exit current line cleared * if error * * retval.consumed will represent some possibly-empty prefix of @p input **/ - static scan_result assemble_token(std::size_t initial_whitespace, - const span_type & token_text, + static scan_result assemble_token( span_type ws_span, + span_type token_text, TkInputState * p_input_state); /** degenerate version of assemble_token() on reaching end-of-file **/ - static scan_result assemble_final_token(const span_type & token_text, + static scan_result assemble_final_token(span_type token_text, TkInputState * p_input_state); /** true if tokenizer contains stored prefix of diff --git a/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp b/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp index 2784072a..c79e10c3 100644 --- a/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp +++ b/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp @@ -110,8 +110,8 @@ namespace xo { } auto - Tokenizer::assemble_token(std::size_t initial_whitespace, - const span_type & token_text, + Tokenizer::assemble_token(span_type ws_span, + span_type token_text, TkInputState * p_input_state) -> result_type { /* literal|pretty|streamlined */ @@ -119,7 +119,7 @@ namespace xo { scope log(XO_DEBUG(p_input_state->debug_flag())); log && log(xtag("token_text", token_text), - xtag("initial_whitespace", initial_whitespace), + xtag("initial_whitespace", ws_span.size()), xtag("input_state", *p_input_state)); tokentype tk_type = tokentype::tk_invalid; @@ -598,18 +598,16 @@ namespace xo { // TOOD: report tk_text as span, // but must pin / unpin - /* input.prefix(0): - * require caller preserves current input line until it's entirely exhausted - */ return result_type(Token(tk_type, std::move(tk_text)), - p_input_state->current_line().prefix(0)); + span_type::concat(ws_span, + span_type(tk_start, tk_end))); } /*assemble_token*/ auto - Tokenizer::assemble_final_token(const span_type & token_text, + Tokenizer::assemble_final_token(span_type token_text, TkInputState * p_input_state) -> result_type { - return assemble_token(0 /*initial_whitespace*/, + return assemble_token(token_text.prefix(0) /*ws_span*/, token_text, p_input_state); } @@ -645,6 +643,7 @@ namespace xo { Tokenizer::scan(const span_type & input) -> result_type { scope log(XO_DEBUG(input_state_.debug_flag())); + log && log(xtag("input", input)); /* - Always at beginning of token when scan() invoked * - scan will not report any portion of line as consumed until it has @@ -659,12 +658,14 @@ namespace xo { const CharT * ix = this->input_state_.skip_leading_whitespace(); if(ix == input.hi()) { - log && log("end input -> consume current line"); + log && log("end buffered input -> consume current line"); /* entirety of current line has been tokenized * -> caller may consume it */ - return result_type::make_whitespace(this->input_state_.consume_current_line()); + this->input_state_.consume_current_line(); + + return result_type::make_whitespace(input); } /* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */ @@ -697,27 +698,17 @@ namespace xo { ++ix; -#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit - if (ix == input.hi()) { - /* need more input to know if/when token complete */ - this->prefix_ += std::string(tk_start, input.hi()); + CharT ch2 = *ix; - log && log(xtag("captured-prefix1", this->prefix_)); - } else -#endif - { - CharT ch2 = *ix; - - if (((ch2 >= '0') && (ch2 <= '9')) - || ((ch2 >= 'A') && (ch2 <= 'Z')) - || ((ch2 >= 'a') && (ch2 <= 'z'))) + if (((ch2 >= '0') && (ch2 <= '9')) + || ((ch2 >= 'A') && (ch2 <= 'Z')) + || ((ch2 >= 'a') && (ch2 <= 'z'))) { /* treat as 1 char punctuation */ ; } else { - /* include next char */ - ++ix; - } + /* include next char */ + ++ix; } } else if (*ix == '"') { bool complete_flag = false; @@ -779,7 +770,7 @@ namespace xo { this->input_state_.advance_until(ix); - return assemble_token(whitespace_z, + return assemble_token(span_type(input.lo(), tk_start), span_type(tk_start, ix) /*token*/, &(this->input_state_)); } @@ -803,7 +794,7 @@ namespace xo { this->input_state_.advance_until(ix); /* ignore next char and complete token */ - return assemble_token(whitespace_z, + return assemble_token(span_type(input.lo(), tk_start), span_type(tk_start, ix) /*token*/, &(this->input_state_)); } @@ -854,7 +845,7 @@ namespace xo { this->input_state_.advance_until(ix); - return assemble_token(whitespace_z, + return assemble_token(span_type(input.lo(), tk_start), span_type(tk_start, ix) /*token*/, &(this->input_state_)); } /*_scan_aux*/