From 258d0823f59fbcbc4a2caf18554c9a1b92d93962 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Fri, 23 Jan 2026 11:54:32 -0500 Subject: [PATCH] xo-reader2: + example app 'readerreplxx' --- xo-arena/include/xo/arena/ArenaConfig.hpp | 8 + xo-gc/include/xo/gc/DX1Collector.hpp | 11 ++ xo-gc/src/gc/DX1Collector.cpp | 9 + xo-reader2/CMakeLists.txt | 5 + xo-reader2/example/CMakeLists.txt | 1 + .../example/readerreplxx/CMakeLists.txt | 14 ++ .../example/readerreplxx/readerreplxx.cpp | 159 ++++++++++-------- .../include/xo/reader2/SchematikaParser.hpp | 1 + .../include/xo/reader2/SchematikaReader.hpp | 22 ++- xo-reader2/src/reader2/CMakeLists.txt | 4 +- xo-reader2/src/reader2/SchematikaParser.cpp | 1 + xo-reader2/src/reader2/SchematikaReader.cpp | 77 ++++++--- xo-tokenizer2/example/tokenrepl/tokenrepl.cpp | 45 ++--- .../include/xo/tokenizer2/Tokenizer.hpp | 5 +- .../include/xo/tokenizer2/TokenizerError.hpp | 16 +- xo-tokenizer2/src/tokenizer2/Tokenizer.cpp | 17 +- 16 files changed, 261 insertions(+), 134 deletions(-) create mode 100644 xo-reader2/example/CMakeLists.txt create mode 100644 xo-reader2/example/readerreplxx/CMakeLists.txt diff --git a/xo-arena/include/xo/arena/ArenaConfig.hpp b/xo-arena/include/xo/arena/ArenaConfig.hpp index 4d79637b..db5a4400 100644 --- a/xo-arena/include/xo/arena/ArenaConfig.hpp +++ b/xo-arena/include/xo/arena/ArenaConfig.hpp @@ -17,6 +17,14 @@ namespace xo { * @brief configuration for a @ref DArena instance **/ struct ArenaConfig { + /** @defgroup mm-arenaconfig-ctors **/ + + ArenaConfig with_size(std::size_t z) { + ArenaConfig copy(*this); + copy.size_ = z; + return copy; + } + /** @defgroup mm-arenaconfig-instance-vars ArenaConfig members **/ ///@{ diff --git a/xo-gc/include/xo/gc/DX1Collector.hpp b/xo-gc/include/xo/gc/DX1Collector.hpp index 33c9f56d..78b55d20 100644 --- a/xo-gc/include/xo/gc/DX1Collector.hpp +++ b/xo-gc/include/xo/gc/DX1Collector.hpp @@ -69,6 +69,12 @@ namespace xo { constexpr std::uint64_t tseq_mask_shifted() const; #endif + + /** copy of this config, + * with @c arena_config_.size_ set to @p gen_z + **/ + CollectorConfig with_size(std::size_t gen_z); + generation age2gen(object_age age) const noexcept { return generation(age % n_survive_threshold_); } @@ -169,6 +175,11 @@ namespace xo { /** Create X1 collector instance. **/ explicit DX1Collector(const CollectorConfig & cfg); + /** create instance with default configuration, + * generation size @p gen_z + **/ + DX1Collector make_std(std::size_t gen_z); + std::string_view name() const { return config_.name_; } const DArena * get_object_types() const noexcept { return &object_types_; } diff --git a/xo-gc/src/gc/DX1Collector.cpp b/xo-gc/src/gc/DX1Collector.cpp index 263e5b8e..3a77e15f 100644 --- a/xo-gc/src/gc/DX1Collector.cpp +++ b/xo-gc/src/gc/DX1Collector.cpp @@ -24,6 +24,15 @@ namespace xo { using xo::facet::with_facet; namespace mm { + + CollectorConfig + CollectorConfig::with_size(std::size_t gen_z) + { + CollectorConfig copy = *this; + copy.arena_config_ = arena_config_.with_size(gen_z); + return copy; + } + #ifdef NOT_USING constexpr std::uint64_t CollectorConfig::gen_mult() const { diff --git a/xo-reader2/CMakeLists.txt b/xo-reader2/CMakeLists.txt index bc35b6a9..43c747f6 100644 --- a/xo-reader2/CMakeLists.txt +++ b/xo-reader2/CMakeLists.txt @@ -197,6 +197,11 @@ xo_add_genfacet_all(xo-reader2-genfacet-all) add_subdirectory(src/reader2) +# ---------------------------------------------------------------- +# example programs + +add_subdirectory(example) + # ---------------------------------------------------------------- # cmake helper (for external xo-reader2 users) diff --git a/xo-reader2/example/CMakeLists.txt b/xo-reader2/example/CMakeLists.txt new file mode 100644 index 00000000..fbb01ff0 --- /dev/null +++ b/xo-reader2/example/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(readerreplxx) diff --git a/xo-reader2/example/readerreplxx/CMakeLists.txt b/xo-reader2/example/readerreplxx/CMakeLists.txt new file mode 100644 index 00000000..37ecd45e --- /dev/null +++ b/xo-reader2/example/readerreplxx/CMakeLists.txt @@ -0,0 +1,14 @@ +# xo-reader2/example/readerreplxx/CMakeLists.txt + +set(SELF_EXE xo_reader2_readereplxx) +set(SELF_SRCS readerreplxx.cpp) + +if (XO_ENABLE_EXAMPLES) + xo_add_executable(${SELF_EXE} ${SELF_SRCS}) + xo_self_dependency(${SELF_EXE} xo_reader2) + xo_external_target_dependency(${SELF_EXE} replxx replxx::replxx) + + # replxx requires this + find_package(Threads REQUIRED) + target_link_libraries(${SELF_EXE} PUBLIC Threads::Threads) +endif() diff --git a/xo-reader2/example/readerreplxx/readerreplxx.cpp b/xo-reader2/example/readerreplxx/readerreplxx.cpp index e30ab047..76bd0520 100644 --- a/xo-reader2/example/readerreplxx/readerreplxx.cpp +++ b/xo-reader2/example/readerreplxx/readerreplxx.cpp @@ -1,6 +1,11 @@ /** @file readerreplxx.cpp **/ -#include "xo/reader/reader.hpp" +#include +#include +#include +#include +//#include +#include #include #include #include // for isatty @@ -8,7 +13,7 @@ // presumeably replxx assumes input is a tty // bool replxx_getline(bool interactive, - std::size_t parser_stack_size, + bool is_at_toplevel, replxx::Replxx & rx, const char ** p_input) { @@ -17,32 +22,23 @@ bool replxx_getline(bool interactive, char const * prompt = ""; if (interactive) { - if (parser_stack_size <= 1) - prompt = "> "; - else - prompt = ". "; + prompt = ((is_at_toplevel) ? "> " : ". "); } const char * input_cstr = rx.input(prompt); bool retval = (input_cstr != nullptr); - if (retval) { - //cerr << "got reval->true" << endl; + if (retval) + *p_input = input_cstr; - input = input_cstr; - - } else { - //cerr << "got retval->false" << endl; - } - - rx.history_add(input); + rx.history_add(input_cstr); return retval; } void -welcome(std::ostream& os) +welcome(std::ostream & os) { using namespace std; @@ -58,18 +54,68 @@ welcome(std::ostream& os) os << endl; } +namespace { + using xo::scm::SchematikaReader; + using xo::print::ppstate_standalone; + using xo::print::ppconfig; + using std::cout; + using std::endl; + + /** body of read-parse-print loop + * + * true -> no errors; + * false -> reader encountered error + **/ + bool + reader_seq(SchematikaReader * p_reader, + SchematikaReader::span_type * p_input, + bool eof) + { + auto [expr, remaining, error] = p_reader->read_expr(*p_input, eof); + + if (expr) { + ppconfig ppc; + ppstate_standalone pps(&cout, 0, &ppc); + + pps.prettyn(expr); + + *p_input = remaining; + + return true; + } else if (error.is_error()) { + cout << "parsing error (detected in " << error.src_function() << "): " << endl; + error.report(cout); + + /* discard stashed remainder of input line + * (for nicely-formatted errors) + */ + p_reader->reset_to_idle_toplevel(); + + return false; + } else { + /* partial expression or whitespace input, no error */ + return true; + } + } +} + int main() { using namespace replxx; - using namespace xo::scm; - using xo::scm::Expression; - using xo::print::ppconfig; - using xo::print::ppstate_standalone; - using xo::rp; - using namespace std; - using span_type = xo::scm::span; + using xo::scm::SchematikaReader; + using xo::scm::ReaderConfig; + using xo::mm::AAllocator; + using xo::mm::DX1Collector; + using xo::mm::CollectorConfig; + using xo::mm::DArena; + //using xo::print::ppconfig; + //using xo::print::ppstate_standalone; + using xo::facet::with_facet; + using xo::facet::obj; + using xo::scope; + using namespace std; bool interactive = isatty(STDIN_FILENO); @@ -82,63 +128,40 @@ main() constexpr bool c_debug_flag = false; scope log(XO_DEBUG(c_debug_flag)); - DArena expr_arena = DArena::map(ArenaConfig{ .name_ = "expr-arena", .size_ = 2*1024*1024; }); - obj expr_alloc = with_facet::mkobj(&expr_arena); - constexpr size_t c_max_stringtable_cap = 1024*1024; - SchematikaParser parser(expr_arena.config_, c_max_stringtable_cap, expr_alloc, c_debug_flag); + CollectorConfig x1_config = (CollectorConfig() + .with_size(4*1024*1024)); + DX1Collector x1(x1_config); + obj expr_alloc = with_facet::mkobj(&x1); - parser.begin_interactive_session(); + // accepting defaults too + ReaderConfig rdr_config = ReaderConfig(); - string input_str; - - bool eof = false; - - span_type input; - std::size_t parser_stack_size = 0; + SchematikaReader rdr(rdr_config, expr_alloc); + using span_type = SchematikaReader::span_type; welcome(cerr); - while (replxx_getline(interactive, parser_stack_size, rx, input_str)) { - input = span_type::from_string(input_str); + rdr.begin_interactive_session(); - while (!input.empty()) { - auto [expr, consumed, psz, error] = rdr.read_expr(input, eof); + bool eof = false; + const char * input_str; + span_type input; - if (expr) { - ppconfig ppc; - ppstate_standalone pps(&cout, 0, &ppc); + while (replxx_getline(interactive, rdr.is_at_toplevel(), rx, &input_str)) { + input = span_type::from_cstr(input_str); - pps.prettyn(expr); - } else if (error.is_error()) { - cout << "parsing error (detected in " << error.src_function() << "): " << endl; - error.report(cout); - - /* discard stashed remainder of input line - * (for nicely-formatted errors) - */ - rdr.reset_to_idle_toplevel(); - break; - } - - input = input.after_prefix(consumed); - parser_stack_size = psz; + while (!input.empty() && reader_seq(&rdr, &input, false /*eof*/)) { + ; } - /* here: input.empty() or error encountered */ - + /* here: either: + * 1. input.empty() or + * 2. error encountered + */ } - auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/); - - if (expr) { - ppconfig ppc; - ppstate_standalone pps(&cout, 0, &ppc); - - pps.prettyn>(rp(expr)); - } else if (error.is_error()) { - cout << "parsing error (detected in " << error.src_function() << "): " << endl; - error.report(cout); - } + /* reminder: eof can complete at most one token */ + reader_seq(&rdr, &input, true /*eof*/); rx.history_save("repl_history.txt"); } diff --git a/xo-reader2/include/xo/reader2/SchematikaParser.hpp b/xo-reader2/include/xo/reader2/SchematikaParser.hpp index e2667b5f..7be74aa5 100644 --- a/xo-reader2/include/xo/reader2/SchematikaParser.hpp +++ b/xo-reader2/include/xo/reader2/SchematikaParser.hpp @@ -157,6 +157,7 @@ namespace xo { using ArenaConfig = xo::mm::ArenaConfig; using AAllocator = xo::mm::AAllocator; using ppindentinfo = xo::print::ppindentinfo; + using size_type = std::size_t; public: /** create parser in initial state; diff --git a/xo-reader2/include/xo/reader2/SchematikaReader.hpp b/xo-reader2/include/xo/reader2/SchematikaReader.hpp index c60612a2..bad6c053 100644 --- a/xo-reader2/include/xo/reader2/SchematikaReader.hpp +++ b/xo-reader2/include/xo/reader2/SchematikaReader.hpp @@ -18,11 +18,11 @@ namespace xo { /** schematika expression parsed from input **/ obj expr_; - /** input span up to end of expression. + /** unconsumed portion of input span * only relevant when result type is expression. - * (otherwise treat entire input as consumed) + * (otherwise input consumed) **/ - span_type consumed_; + span_type remaining_input_; /** {src_function, error_description, input_state, error_pos} **/ TokenizerError tk_error_; @@ -36,18 +36,32 @@ namespace xo { class SchematikaReader { public: using AAllocator = xo::mm::AAllocator; + using span_type = xo::mm::span; + using size_type = std::size_t; public: SchematikaReader(const ReaderConfig & config, obj expr_alloc); + /** true iff parser is at top-level. + * false iff parser is working on incomplete expression + **/ + bool is_at_toplevel() const noexcept; + /** prepare interactive session * (allows rvalue expressions at toplevel) **/ void begin_interactive_session(); /** consume input @p input_cstr **/ - const ReaderResult & read_expr(const char * input_cstr, bool eof); + const ReaderResult & read_expr(span_type input_span, bool eof); + + /** reset to known starting point after encountering an error. + * - remainder of stashed current line. + * Necesary for well-formatted error reporting. + * - current parsing state + **/ + void reset_to_idle_toplevel(); private: /** tokenizer converts a stream of chars diff --git a/xo-reader2/src/reader2/CMakeLists.txt b/xo-reader2/src/reader2/CMakeLists.txt index 97c46c6a..37dda298 100644 --- a/xo-reader2/src/reader2/CMakeLists.txt +++ b/xo-reader2/src/reader2/CMakeLists.txt @@ -3,6 +3,8 @@ set(SELF_LIB xo_reader2) set(SELF_SRCS init_reader2.cpp + reader2_register_facets.cpp + reader2_register_types.cpp SchematikaReader.cpp ReaderConfig.cpp @@ -39,8 +41,6 @@ set(SELF_SRCS ISyntaxStateMachine_DProgressSsm.cpp IPrintable_DProgressSsm.cpp - reader2_register_facets.cpp - reader2_register_types.cpp ) xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) diff --git a/xo-reader2/src/reader2/SchematikaParser.cpp b/xo-reader2/src/reader2/SchematikaParser.cpp index fea573c7..13f9edff 100644 --- a/xo-reader2/src/reader2/SchematikaParser.cpp +++ b/xo-reader2/src/reader2/SchematikaParser.cpp @@ -7,6 +7,7 @@ #include "ParserStateMachine.hpp" #include "ParserStack.hpp" #include "DExprSeqState.hpp" +#include #include #include diff --git a/xo-reader2/src/reader2/SchematikaReader.cpp b/xo-reader2/src/reader2/SchematikaReader.cpp index f8c6a152..bfd14575 100644 --- a/xo-reader2/src/reader2/SchematikaReader.cpp +++ b/xo-reader2/src/reader2/SchematikaReader.cpp @@ -17,6 +17,12 @@ namespace xo { { } + bool + SchematikaReader::is_at_toplevel() const noexcept + { + return parser_.is_at_toplevel(); + } + void SchematikaReader::begin_interactive_session() { @@ -27,24 +33,28 @@ namespace xo { // Schematika::end_interactive_session() const ReaderResult & - SchematikaReader::read_expr(const char * input_cstr, bool eof) + SchematikaReader::read_expr(span_type input_ext, bool eof) { - if (input_cstr && *input_cstr) { + if (!input_ext.empty()) { auto [error, input] - = tokenizer_.buffer_input_line(input_cstr, - false /*!eof*/); + = tokenizer_.buffer_input_line(input_ext, eof); + // log && log(xtag("msg", "buffered input line")); // log && log(xtag("input", input)); - - while (!input.empty()) { - auto [tk, consumed, error] = tkz.scan(input); + auto [tk, consumed, error] = tokenizer_.scan(input); + + auto rem_input = input.after_prefix(consumed); if (!tk.is_valid() && error.is_error()) { - this->result_ = ReaderResult { .expr_ = obj(), - .tk_error_ = std::move(error), - .consumed_ = nullptr }; + this->result_ + = ReaderResult + { .expr_ = obj(), + .remaining_input_ = rem_input, + .tk_error_ = std::move(error) + }; + return result_; } @@ -58,7 +68,7 @@ namespace xo { // error_description :: const DString * // } // - const ParserResult & presult = parser_include_token(tk); + const ParserResult & presult = parser_.on_token(tk); if (presult.is_error()) { // tk_error { @@ -76,29 +86,50 @@ namespace xo { // // tk_error.report(cout); - this->result_ = ReaderResult { .expr = obj(), - .tk_error_ = std::move(error), - .consumed_ = nullptr }; + this->result_ + = ReaderResult + { .expr_ = obj(), + .remaining_input_ = rem_input, + .tk_error_ = std::move(error) }; + + assert(presult.error_description()); // carefully created error description, maybe - this->result.tk_error_.error_description_ = presult.error_description_; + this->result_.tk_error_ + = result_.tk_error_.with_error + (presult.error_src_fn_, + std::string + (std::string_view(*(presult.error_description())))); + return result_; + } else if (presult.is_expression()) { + this->result_ + = ReaderResult + { + .expr_ = presult.result_expr(), + .remaining_input_ = rem_input, + .tk_error_ = TokenizerError() + }; + + return result_; } - - xxxx; - } else if (error.is_error()) { - xxxx; - // error.report(cout); - break; } - input = input.after_prefix(consumed); + input = rem_input; } } - ++line_no; + this->result_ = ReaderResult(); + + return this->result_; } + void + SchematikaReader::reset_to_idle_toplevel() + { + this->tokenizer_.discard_current_line(); + this->parser_.reset_to_idle_toplevel(); + } } /*namespace scm*/ } /*namespace xo*/ diff --git a/xo-tokenizer2/example/tokenrepl/tokenrepl.cpp b/xo-tokenizer2/example/tokenrepl/tokenrepl.cpp index 1cf02244..d8ddbd7f 100644 --- a/xo-tokenizer2/example/tokenrepl/tokenrepl.cpp +++ b/xo-tokenizer2/example/tokenrepl/tokenrepl.cpp @@ -85,32 +85,33 @@ main() { { //cout << "input: " << input << endl; + auto input_ext = Tokenizer::span_type::from_cstr(input_cstr); + // reminder: input may contain multiple tokens - if (input_cstr && *input_cstr) { - auto [error, input] = tkz.buffer_input_line(input_cstr, false /*!eof*/); + auto [error, input] = tkz.buffer_input_line(input_ext, false /*!eof*/); - if (log) { - log(xtag("msg", "buffered input line")); - log(xtag("input", input)); + if (log) { + log(xtag("msg", "buffered input line")); + log(xtag("input", input)); + } + + while (!input.empty()) + { + auto [tk, consumed, error] = tkz.scan(input); + + log && log(xtag("consumed", consumed), xtag("tk", tk)); + + if (tk.is_valid()) { + cout << tk << endl; + } else if (error.is_error()) { + cout << "tokenizer error: " << endl; + + error.report(cout); + + break; } - while (!input.empty()) - { - auto [tk, consumed, error] = tkz.scan(input); - - log && log(xtag("consumed", consumed), xtag("tk", tk)); - - if (tk.is_valid()) { - cout << tk << endl; - } else if (error.is_error()) { - cout << "tokenizer error: " << endl; - error.report(cout); - - break; - } - - input = input.after_prefix(consumed); - } + input = input.after_prefix(consumed); } /* here: input.empty() or error encountered */ diff --git a/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp b/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp index 40a98cd9..69843a5a 100644 --- a/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp +++ b/xo-tokenizer2/include/xo/tokenizer2/Tokenizer.hpp @@ -129,10 +129,11 @@ namespace xo { **/ bool has_prefix() const { return !prefix_.empty(); } - /** buffer contents of input_cstr. + /** copy into buffer the contents of @p input. * May throw if buffer space exhausted **/ - std::pair buffer_input_line(const char * input_cstr, bool eof_flag); + std::pair buffer_input_line(span_type input, + bool eof_flag); /** scan for next input token, given @p input. * Note: diff --git a/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp b/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp index b8a50988..bf7702b1 100644 --- a/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp +++ b/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp @@ -32,7 +32,7 @@ namespace xo { * @p tk_start current position on entry to scanner * @p error_pos error location relative to token start **/ - TokenizerError(const char * src_function, + TokenizerError(std::string_view src_function, std::string error_description, const TkInputState & input_state, size_t error_pos) @@ -46,12 +46,20 @@ namespace xo { log && log(xtag("input_state.current_pos", input_state.current_pos()), xtag("error_pos", error_pos)); } + + TokenizerError with_error(std::string_view error_src_fn, + std::string error_msg) { + return TokenizerError(error_src_fn, + std::string(error_msg), + this->input_state_, + 0 /*error_pos*/); + } ///@} /** @defgroup tokenizer-error-access-methods **/ ///@{ - const char * src_function() const { return src_function_; } + std::string_view src_function() const { return src_function_; } const std::string & error_description() const { return error_description_; } #pragma GCC diagnostic push #ifndef __APPLE__ @@ -88,8 +96,8 @@ namespace xo { ///@{ /** source location (in tokenizer) at which error identified **/ - char const * src_function_ = nullptr; - /** static error description **/ + std::string_view src_function_; + /** error description **/ std::string error_description_; /** input state associated with this error. * Sufficient to precisely locate it with context. diff --git a/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp b/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp index 7076a95d..2784072a 100644 --- a/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp +++ b/xo-tokenizer2/src/tokenizer2/Tokenizer.cpp @@ -615,19 +615,18 @@ namespace xo { } auto - Tokenizer::buffer_input_line(const char * input_cstr, + Tokenizer::buffer_input_line(span_type input_ext, bool eof_flag) -> std::pair { scope log(XO_DEBUG(input_state_.debug_flag())); - log && log(xtag("input", input_cstr)); + log && log(xtag("input_ext", input_ext)); auto buf_input_0 = input_buffer_.input_range().hi(); - auto remainder = input_buffer_.append - (DCircularBuffer::const_span_type::from_cstr(input_cstr)); - auto remainder2 = input_buffer_.append - (DCircularBuffer::const_span_type::from_cstr("\n")); + auto remainder = input_buffer_.append(input_ext); + auto remainder2 = input_buffer_.append(span_type::from_cstr("\n")); + //(DCircularBuffer::const_span_type::from_cstr("\n")); if (!remainder.empty() || !remainder2.empty()) { throw std::runtime_error(tostr("Tokenizer::buffer_line: line too long!", @@ -636,10 +635,10 @@ namespace xo { auto buf_input_1 = input_buffer_.input_range().hi(); - span_type input = span_type(buf_input_0, - buf_input_1); + span_type input_ours = span_type(buf_input_0, + buf_input_1); - return this->input_state_.capture_current_line(input, eof_flag); + return this->input_state_.capture_current_line(input_ours, eof_flag); } auto