From dc9f29275a2d7fe484cbdface0be86a24245ac7d Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Thu, 22 Jan 2026 21:03:40 -0500 Subject: [PATCH] xo-reader2: working on example parser repl --- .../example/readerreplxx/readerreplxx.cpp | 146 ++++++++++++++++++ .../include/xo/reader2/ParserResult.hpp | 2 +- .../include/xo/reader2/ReaderConfig.hpp | 54 +++++++ .../include/xo/reader2/SchematikaParser.hpp | 2 +- .../include/xo/reader2/SchematikaReader.hpp | 70 +++++++++ xo-reader2/src/reader2/CMakeLists.txt | 3 + xo-reader2/src/reader2/ReaderConfig.cpp | 13 ++ xo-reader2/src/reader2/SchematikaReader.cpp | 105 +++++++++++++ .../include/xo/tokenizer2/TokenizerError.hpp | 2 +- .../include/xo/tokenizer2/scan_result.hpp | 4 +- 10 files changed, 396 insertions(+), 5 deletions(-) create mode 100644 xo-reader2/example/readerreplxx/readerreplxx.cpp create mode 100644 xo-reader2/include/xo/reader2/ReaderConfig.hpp create mode 100644 xo-reader2/include/xo/reader2/SchematikaReader.hpp create mode 100644 xo-reader2/src/reader2/ReaderConfig.cpp create mode 100644 xo-reader2/src/reader2/SchematikaReader.cpp diff --git a/xo-reader2/example/readerreplxx/readerreplxx.cpp b/xo-reader2/example/readerreplxx/readerreplxx.cpp new file mode 100644 index 00000000..e30ab047 --- /dev/null +++ b/xo-reader2/example/readerreplxx/readerreplxx.cpp @@ -0,0 +1,146 @@ +/** @file readerreplxx.cpp **/ + +#include "xo/reader/reader.hpp" +#include +#include +#include // for isatty + +// presumeably replxx assumes input is a tty +// +bool replxx_getline(bool interactive, + std::size_t parser_stack_size, + replxx::Replxx & rx, + const char ** p_input) +{ + using namespace std; + + char const * prompt = ""; + + if (interactive) { + if (parser_stack_size <= 1) + prompt = "> "; + else + prompt = ". "; + } + + const char * input_cstr = rx.input(prompt); + + bool retval = (input_cstr != nullptr); + + if (retval) { + //cerr << "got reval->true" << endl; + + input = input_cstr; + + } else { + //cerr << "got retval->false" << endl; + } + + rx.history_add(input); + + return retval; +} + +void +welcome(std::ostream& os) +{ + using namespace std; + + os << "read-eval-print loop for schematika expressions" << endl; + os << " ctrl-a/ctrl-e beginning/end of line" << endl; + os << " ctrl-u delete entire line" << endl; + os << " ctrl-k delete to end of line" << endl; + os << " meta- backward delete word" << endl; + os << " |meta-p previous command from history" << endl; + os << " |meta-n next command from history" << endl; + os << " / page through history faster" << endl; + os << " ctrl-s/ctrl-r forward/backward history search" << endl; + os << endl; +} + +int +main() +{ + using namespace replxx; + using namespace xo::scm; + using xo::scm::Expression; + using xo::print::ppconfig; + using xo::print::ppstate_standalone; + using xo::rp; + using namespace std; + + using span_type = xo::scm::span; + + bool interactive = isatty(STDIN_FILENO); + + Replxx rx; + rx.set_max_history_size(1000); + rx.history_load("repl_history.txt"); +// rx.bind_key_internal(Replxx::KEY::control('p'), "history_previous"); +// rx.bind_key_internal(Replxx::KEY::control('n'), "history_next"); + + constexpr bool c_debug_flag = false; + scope log(XO_DEBUG(c_debug_flag)); + + DArena expr_arena = DArena::map(ArenaConfig{ .name_ = "expr-arena", .size_ = 2*1024*1024; }); + obj expr_alloc = with_facet::mkobj(&expr_arena); + constexpr size_t c_max_stringtable_cap = 1024*1024; + SchematikaParser parser(expr_arena.config_, c_max_stringtable_cap, expr_alloc, c_debug_flag); + + parser.begin_interactive_session(); + + string input_str; + + bool eof = false; + + span_type input; + std::size_t parser_stack_size = 0; + + welcome(cerr); + + while (replxx_getline(interactive, parser_stack_size, rx, input_str)) { + input = span_type::from_string(input_str); + + while (!input.empty()) { + auto [expr, consumed, psz, error] = rdr.read_expr(input, eof); + + if (expr) { + ppconfig ppc; + ppstate_standalone pps(&cout, 0, &ppc); + + pps.prettyn(expr); + } else if (error.is_error()) { + cout << "parsing error (detected in " << error.src_function() << "): " << endl; + error.report(cout); + + /* discard stashed remainder of input line + * (for nicely-formatted errors) + */ + rdr.reset_to_idle_toplevel(); + break; + } + + input = input.after_prefix(consumed); + parser_stack_size = psz; + } + + /* here: input.empty() or error encountered */ + + } + + auto [expr, _1, _2, error] = rdr.read_expr(input, true /*eof*/); + + if (expr) { + ppconfig ppc; + ppstate_standalone pps(&cout, 0, &ppc); + + pps.prettyn>(rp(expr)); + } else if (error.is_error()) { + cout << "parsing error (detected in " << error.src_function() << "): " << endl; + error.report(cout); + } + + rx.history_save("repl_history.txt"); +} + +/* end readerreplxx.cpp */ diff --git a/xo-reader2/include/xo/reader2/ParserResult.hpp b/xo-reader2/include/xo/reader2/ParserResult.hpp index 6592c7da..492b78b0 100644 --- a/xo-reader2/include/xo/reader2/ParserResult.hpp +++ b/xo-reader2/include/xo/reader2/ParserResult.hpp @@ -67,7 +67,7 @@ namespace xo { /** pretty-printing support **/ bool pretty(const ppindentinfo & ppii) const; - private: + public: /** none|expression|error_description * * @text diff --git a/xo-reader2/include/xo/reader2/ReaderConfig.hpp b/xo-reader2/include/xo/reader2/ReaderConfig.hpp new file mode 100644 index 00000000..e7f0d7a6 --- /dev/null +++ b/xo-reader2/include/xo/reader2/ReaderConfig.hpp @@ -0,0 +1,54 @@ +/** @file ReaderConfig.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include +#include + +namespace xo { + namespace scm { + + /** @brief Configuration for SchemtikaReader + **/ + struct ReaderConfig { + using CircularBufferConfig = xo::mm::CircularBufferConfig; + using ArenaConfig = xo::mm::ArenaConfig; + using size_t = std::size_t; + + /** tokenizer circular buffer config **/ + CircularBufferConfig tk_buffer_config_ {.name_ = "tk-buffer", + .max_capacity_ = 2*1024*1024, + .hugepage_z_ = 2*1024*1024, + .threshold_move_efficiency_ = 50.0, + .max_captured_span_ = 128 }; + /** debug flag for schematika tokenizer **/ + bool tk_debug_flag_ = false; + /** arena configuration for parser stack **/ + ArenaConfig parser_arena_config_ { .name_ = "parer-arena", + .size_ = 2*1024*1024, + .hugepage_z_ = 2*1024*1024, + .store_header_flag_ = false, + .header_{}, + .debug_flag_ = false }; + /** max size (in bytes) of stringtable **/ + size_t max_stringtable_cap_ = 64*1024; + /** debug flag for schematika parser **/ + bool parser_debug_flag_ = false; +#ifdef NOT_YET + /** arena configuration for output expressions **/ + ArenaConfig expr_arena_config_ { .name_ = "expr-arena", + .size_ = 2*1024*1024, + .hugepage_z_ = 2*1024*1024, + .store_header_flag_ = false, + .header_{}, + .debug_flag_ = false }; +#endif + }; + + } /*namespace scm*/ +} /*namepspace xo*/ + +/* end ReaderConfig.hpp */ diff --git a/xo-reader2/include/xo/reader2/SchematikaParser.hpp b/xo-reader2/include/xo/reader2/SchematikaParser.hpp index aecc3c9a..e2667b5f 100644 --- a/xo-reader2/include/xo/reader2/SchematikaParser.hpp +++ b/xo-reader2/include/xo/reader2/SchematikaParser.hpp @@ -162,7 +162,7 @@ namespace xo { /** create parser in initial state; * parser is ready to receive tokens via @ref include_token * - * @p config arena configuration for parser memory + * @p config arena configuration for parser stack * @p expr_alloc allocator for schematika expressions. * Probably shared with execution. * @p debug_flag true to enable debug logging diff --git a/xo-reader2/include/xo/reader2/SchematikaReader.hpp b/xo-reader2/include/xo/reader2/SchematikaReader.hpp new file mode 100644 index 00000000..c60612a2 --- /dev/null +++ b/xo-reader2/include/xo/reader2/SchematikaReader.hpp @@ -0,0 +1,70 @@ +/** @file SchematikaReader.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include "ReaderConfig.hpp" +#include "SchematikaParser.hpp" +#include + +namespace xo { + namespace scm { + struct ReaderResult { + using span_type = xo::mm::span; + + bool is_tk_error() const { return tk_error_.is_error(); } + + /** schematika expression parsed from input **/ + obj expr_; + /** input span up to end of expression. + * only relevant when result type is expression. + * (otherwise treat entire input as consumed) + **/ + span_type consumed_; + + /** {src_function, error_description, input_state, error_pos} **/ + TokenizerError tk_error_; + }; + + /** @class SchematikaReader + * @brief Pipeline comprising Schematika tokenizer and parser + * + * Consumes text; produces expressions + **/ + class SchematikaReader { + public: + using AAllocator = xo::mm::AAllocator; + + public: + SchematikaReader(const ReaderConfig & config, + obj expr_alloc); + + /** prepare interactive session + * (allows rvalue expressions at toplevel) + **/ + void begin_interactive_session(); + + /** consume input @p input_cstr **/ + const ReaderResult & read_expr(const char * input_cstr, bool eof); + + private: + /** tokenizer converts a stream of chars + * to a stream of lexical tokens + **/ + Tokenizer tokenizer_; + + /** parser converts a stream of tokens + * to a stream of expressions + **/ + SchematikaParser parser_; + + /** current output from reader **/ + ReaderResult result_; + + }; + } /*namespace scm*/ +} /*namespace xo*/ + +/* end SchematikaReader.hpp */ diff --git a/xo-reader2/src/reader2/CMakeLists.txt b/xo-reader2/src/reader2/CMakeLists.txt index 232174ef..97c46c6a 100644 --- a/xo-reader2/src/reader2/CMakeLists.txt +++ b/xo-reader2/src/reader2/CMakeLists.txt @@ -4,6 +4,9 @@ set(SELF_LIB xo_reader2) set(SELF_SRCS init_reader2.cpp + SchematikaReader.cpp + ReaderConfig.cpp + SchematikaParser.cpp ParserStateMachine.cpp ParserStack.cpp diff --git a/xo-reader2/src/reader2/ReaderConfig.cpp b/xo-reader2/src/reader2/ReaderConfig.cpp new file mode 100644 index 00000000..7db5fe59 --- /dev/null +++ b/xo-reader2/src/reader2/ReaderConfig.cpp @@ -0,0 +1,13 @@ +/** @file ReaderConfig.cpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#include "ReaderConfig.hpp" + +namespace xo { + namespace scm { + } +} /*namespace xo*/ + +/* end ReaderConfig.cpp */ diff --git a/xo-reader2/src/reader2/SchematikaReader.cpp b/xo-reader2/src/reader2/SchematikaReader.cpp new file mode 100644 index 00000000..f8c6a152 --- /dev/null +++ b/xo-reader2/src/reader2/SchematikaReader.cpp @@ -0,0 +1,105 @@ +/** @file SchematikaReader.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "SchematikaReader.hpp" + +namespace xo { + namespace scm { + SchematikaReader::SchematikaReader(const ReaderConfig & config, + obj expr_alloc) + : tokenizer_{config.tk_buffer_config_, config.tk_debug_flag_}, + parser_{config.parser_arena_config_, + config.max_stringtable_cap_, + expr_alloc, + config.parser_debug_flag_} + { + } + + void + SchematikaReader::begin_interactive_session() + { + parser_.begin_interactive_session(); + } + + // TODO: + // Schematika::end_interactive_session() + + const ReaderResult & + SchematikaReader::read_expr(const char * input_cstr, bool eof) + { + if (input_cstr && *input_cstr) { + auto [error, input] + = tokenizer_.buffer_input_line(input_cstr, + false /*!eof*/); + // log && log(xtag("msg", "buffered input line")); + // log && log(xtag("input", input)); + + + + while (!input.empty()) { + auto [tk, consumed, error] = tkz.scan(input); + + if (!tk.is_valid() && error.is_error()) { + this->result_ = ReaderResult { .expr_ = obj(), + .tk_error_ = std::move(error), + .consumed_ = nullptr }; + return result_; + } + + // log && log(xtag("consumed", consumed), xtag("tk", tk)); + + if (tk.is_valid()) { + // presult { + // result_type :: parser_result_type = none|expression|error + // result_expr :: obj + // error_src_function :: string_view + // error_description :: const DString * + // } + // + const ParserResult & presult = parser_include_token(tk); + + if (presult.is_error()) { + // tk_error { + // src_function :: const char * + // error_description :: string + // input_state { + // current_line :: span + // tk_start :: size_t + // current_pos :: size_t + // whitespace :: size_t + // debug_flag :: bool + // } + // error_pos :: size_t + // } + // + // tk_error.report(cout); + + this->result_ = ReaderResult { .expr = obj(), + .tk_error_ = std::move(error), + .consumed_ = nullptr }; + + // carefully created error description, maybe + this->result.tk_error_.error_description_ = presult.error_description_; + + } + + xxxx; + } else if (error.is_error()) { + xxxx; + // error.report(cout); + break; + } + + input = input.after_prefix(consumed); + } + } + + ++line_no; + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end SchematikaReader.cpp */ diff --git a/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp b/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp index a1cb99ee..b8a50988 100644 --- a/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp +++ b/xo-tokenizer2/include/xo/tokenizer2/TokenizerError.hpp @@ -99,7 +99,7 @@ namespace xo { size_t error_pos_ = 0; ///@} - }; /*error_token*/ + }; inline std::ostream & operator<< (std::ostream & os, diff --git a/xo-tokenizer2/include/xo/tokenizer2/scan_result.hpp b/xo-tokenizer2/include/xo/tokenizer2/scan_result.hpp index 249154f1..45718c5c 100644 --- a/xo-tokenizer2/include/xo/tokenizer2/scan_result.hpp +++ b/xo-tokenizer2/include/xo/tokenizer2/scan_result.hpp @@ -28,9 +28,9 @@ namespace xo { **/ class scan_result { public: - using CharT = char; + //using CharT = char; using token_type = Token; - using span_type = xo::mm::span; + using span_type = xo::mm::span; using error_type = TokenizerError; using input_state_type = TkInputState;