From 7ee57309b567358edc33759c119006a6dce72d3c Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Sun, 18 Jan 2026 17:59:46 -0500 Subject: [PATCH] xo-reader2 scaffold (fomo+arena version of xo-reader/) [WIP] --- CMakeLists.txt | 42 ++++-- DESIGN.md | 12 ++ cmake/xo_reader2Config.cmake.in | 4 +- idl/ISyntaxStateMachine_DExprSeqState.json5 | 13 ++ idl/SyntaxStateMachine.json5 | 56 ++++++++ include/xo/reader2/DExprSeqState.hpp | 76 ++++++++++ include/xo/reader2/ExpressionParser.hpp | 80 +++++++++++ include/xo/reader2/ParserResult.hpp | 52 +++++++ include/xo/reader2/ParserStack.hpp | 47 ++++++ include/xo/reader2/ParserStateMachine.hpp | 115 +++++++++++++++ include/xo/reader2/Reader.hpp | 25 ++++ include/xo/reader2/SyntaxStateMachine.hpp | 22 +++ .../xo/reader2/ssm/ASyntaxStateMachine.hpp | 78 ++++++++++ .../reader2/ssm/ISyntaxStateMachine_Any.hpp | 87 ++++++++++++ .../ssm/ISyntaxStateMachine_DExprSeqState.hpp | 64 +++++++++ .../reader2/ssm/ISyntaxStateMachine_Xfer.hpp | 88 ++++++++++++ .../xo/reader2/ssm/RSyntaxStateMachine.hpp | 85 +++++++++++ include/xo/reader2/syntaxstatetype.hpp | 38 +++++ src/reader2/CMakeLists.txt | 30 ++++ src/reader2/DExprSeqState.cpp | 101 +++++++++++++ src/reader2/ISyntaxStateMachine_Any.cpp | 47 ++++++ .../ISyntaxStateMachine_DExprSeqState.cpp | 39 +++++ src/reader2/ParserResult.cpp | 32 +++++ src/reader2/ParserStack.cpp | 28 ++++ src/reader2/ParserStateMachine.cpp | 134 ++++++++++++++++++ 25 files changed, 1381 insertions(+), 14 deletions(-) create mode 100644 DESIGN.md create mode 100644 idl/ISyntaxStateMachine_DExprSeqState.json5 create mode 100644 idl/SyntaxStateMachine.json5 create mode 100644 include/xo/reader2/DExprSeqState.hpp create mode 100644 include/xo/reader2/ExpressionParser.hpp create mode 100644 include/xo/reader2/ParserResult.hpp create mode 100644 include/xo/reader2/ParserStack.hpp create mode 100644 include/xo/reader2/ParserStateMachine.hpp create mode 100644 include/xo/reader2/Reader.hpp create mode 100644 include/xo/reader2/SyntaxStateMachine.hpp create mode 100644 include/xo/reader2/ssm/ASyntaxStateMachine.hpp create mode 100644 include/xo/reader2/ssm/ISyntaxStateMachine_Any.hpp create mode 100644 include/xo/reader2/ssm/ISyntaxStateMachine_DExprSeqState.hpp create mode 100644 include/xo/reader2/ssm/ISyntaxStateMachine_Xfer.hpp create mode 100644 include/xo/reader2/ssm/RSyntaxStateMachine.hpp create mode 100644 include/xo/reader2/syntaxstatetype.hpp create mode 100644 src/reader2/CMakeLists.txt create mode 100644 src/reader2/DExprSeqState.cpp create mode 100644 src/reader2/ISyntaxStateMachine_Any.cpp create mode 100644 src/reader2/ISyntaxStateMachine_DExprSeqState.cpp create mode 100644 src/reader2/ParserResult.cpp create mode 100644 src/reader2/ParserStack.cpp create mode 100644 src/reader2/ParserStateMachine.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a8508472..30732417 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,20 +22,36 @@ add_definitions(${PROJECT_CXX_FLAGS}) #add_subdirectory(utest) -# ---------------------------------------------------------------- -# header-only library +# note: manual target; generated code committed to git +xo_add_genfacet( + TARGET xo-reader2-facet-syntaxstatemachine + FACET SyntaxStateMachine + INPUT idl/SyntaxStateMachine.json5 + OUTPUT_HPP_DIR include/xo/reader2 + OUTPUT_IMPL_SUBDIR ssm + OUTPUT_CPP_DIR src/reader2 + ) + +# note: manual target; generated code committed to git +xo_add_genfacetimpl( + TARGET xo-reader2-facetimpl-syntaxstatemachine-exprseqstate + FACET_PKG xo_reader2 + FACET SyntaxStateMachine + REPR ExprSeqState + INPUT idl/ISyntaxStateMachine_DExprSeqState.json5 + OUTPUT_HPP_DIR include/xo/reader2 + OUTPUT_IMPL_SUBDIR ssm + OUTPUT_CPP_DIR src/reader2 +) + +# ---------------------------------------------------------------- +# shared library + +add_subdirectory(src/reader2) + +# ---------------------------------------------------------------- +# cmake helper (for external xo-reader2 users) -set(SELF_LIB xo_reader2) -xo_add_headeronly_library(${SELF_LIB}) -xo_install_library4(${SELF_LIB} ${PROJECT_NAME}Targets) xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets) -# ---------------------------------------------------------------- -# input dependencies -# -# NOTE: dependency set here must be kept consistent with -# xo-reader2/cmake/xo_reader2Config.cmake.in - -#xo_headeronly_dependency(${SELF_LIB} xo_flatstring) - # end CMakeLists.txt diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 00000000..69596c44 --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,12 @@ +Uses arena allocators for fast+efficient parsing. + +Composition of nested state machines. + +## SyntaxStateMachine + +a state machine dedicated to some particular Schematika syntax. +Examples: if-expression, type declaration, function call + +## DExprSeqState + +top-level expression sequence diff --git a/cmake/xo_reader2Config.cmake.in b/cmake/xo_reader2Config.cmake.in index b5c3cd5c..2b36efff 100644 --- a/cmake/xo_reader2Config.cmake.in +++ b/cmake/xo_reader2Config.cmake.in @@ -6,7 +6,9 @@ include(CMakeFindDependencyMacro) # must coordinate with xo_dependency() calls # in CMakeLists.txt # -#find_dependency(xo_flatstring) +find_dependency(xo_gc) +find_dependency(xo_tokenizer2) +find_dependency(xo_expression2) include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") check_required_components("@PROJECT_NAME@") diff --git a/idl/ISyntaxStateMachine_DExprSeqState.json5 b/idl/ISyntaxStateMachine_DExprSeqState.json5 new file mode 100644 index 00000000..0d0cdcac --- /dev/null +++ b/idl/ISyntaxStateMachine_DExprSeqState.json5 @@ -0,0 +1,13 @@ +{ + mode: "implementation", + includes: [ "\"SyntaxStateMachine.hpp\"", + "\"ssm/ISyntaxStateMachine_Xfer.hpp\"" ], + local_types: [ ], + namespace1: "xo", + namespace2: "scm", + facet_idl: "idl/SyntaxStateMachine.json5", + brief: "provide ASyntaxStateMachine interface for DExprSeqState", + using_doxygen: true, + repr: "DExprSeqState", + doc: [ "implement ASyntaxStateMachine for DExprSeqState" ], +} diff --git a/idl/SyntaxStateMachine.json5 b/idl/SyntaxStateMachine.json5 new file mode 100644 index 00000000..0023a16e --- /dev/null +++ b/idl/SyntaxStateMachine.json5 @@ -0,0 +1,56 @@ +{ + mode: "facet", + // includes in ASyntaxStateMachine.hpp + includes: [ + "\"ParserStateMachine.hpp\"", + "\"syntaxstatetype.hpp\"", + "", + ], + // extra includes in SyntaxStateMachine.hpp, if any + user_hpp_includes: [], + namespace1: "xo", + namespace2: "scm", + // text after includes, before ASyntaxStateMachine + pretext: ["// {pretex} here"], + facet: "SyntaxStateMachine", + detail_subdir: "ssm", + brief: "specialized state machine for parsing some particular schematika syntax", + using_doxygen: true, + doc: [ + "Assistant to schematika parser dedicated to particular syntax" + ], + types: [ + // { name: string, doc: [ string ], definition: string }, + ], + const_methods: [ + { + name: "ssm_type", + doc: ["identify a type of syntax state machine"], + return_type: "syntaxstatetype", + args: [], + const: true, + noexcept: true, + attributes: [], + }, + { + name: "get_expect_str", + doc: ["text describing expected/allowed input to this ssm in current state"], + return_type: "std::string_view", + args: [], + const: true, + noexcept: true, + attributes: [], + }, + ], + nonconst_methods: [ + { + name: "on_if_token", + doc: ["update state machine for incoming if-keyword-token @p tk"], + return_type: "void", + args: [ + {type: "const Token &", name: "tk"}, + {type: "ParserStateMachine *", name: "p_psm"}, + ], + }, + ], +} diff --git a/include/xo/reader2/DExprSeqState.hpp b/include/xo/reader2/DExprSeqState.hpp new file mode 100644 index 00000000..f1faed86 --- /dev/null +++ b/include/xo/reader2/DExprSeqState.hpp @@ -0,0 +1,76 @@ +/** @file DExprSeqState.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include "ParserStateMachine.hpp" +#include "SyntaxStateMachine.hpp" +#include "syntaxstatetype.hpp" +#include + +namespace xo { + namespace scm { + enum class exprseqtype { + /** toplevel interactive sequence. + * allows: rvalue expressions + **/ + toplevel_interactive, + /** toplevel non-interactive sequence. + * allows: + **/ + toplevel_batch, + /** counts number of valid enums **/ + N + }; + + /** @class DExprSeqState + * @brief state machine for parsing a sequence of expression + * + * Similar to exprseq_xs in xo-expresion + **/ + class DExprSeqState { + public: + using AAllocator = xo::mm::AAllocator; + + public: + explicit DExprSeqState(exprseqtype ty); + + /** start interactive top-level session **/ + static void start_interactive(obj mm, + ParserStateMachine * p_psm); + /** start non-interactive top-level session **/ + static void start_batch(obj mm, + ParserStateMachine * p_psm); + + public: + /** @defgroup scm-exprseq-ssm-facet syntaxstatemachine facet methods **/ + ///@{ + + /** identifies the ssm implemented here **/ + syntaxstatetype ssm_type() const noexcept; + + /** text describing expected/allowed input to this ssm in current state. + * Intended to drive error mesages + **/ + std::string_view get_expect_str() const noexcept; + + /** update state for this syntax on incoming token @p tk, + * overall parser state in @p p_psm + **/ + void on_if_token(const Token & tk, ParserStateMachine * p_psm); + + ///@} + + private: + /** sequence type. accept rvalue expressions when + * this is toplevel_interactive. + * Always accept definitions and declarations. + **/ + exprseqtype seqtype_; + }; + } /*namespace scm*/ +} /*namespace xo*/ + +/* end DExprSeqState.hpp */ diff --git a/include/xo/reader2/ExpressionParser.hpp b/include/xo/reader2/ExpressionParser.hpp new file mode 100644 index 00000000..e59464b5 --- /dev/null +++ b/include/xo/reader2/ExpressionParser.hpp @@ -0,0 +1,80 @@ +/** @file ExpressionParser.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "ExprState.hpp" +#include +#include +#include + +namespace xo { + namespace scm { + /** @class ExpressionParser + * @brief Assemble Schematika expressions from token sequences + * + * Parser represents Each partially assembled expression by + * an ExprState object. + * Expreesions form a tree: + * each expression belongs to at most one parent. + * + **/ + class ExpressionParser { + public: + void push_exprstate(obj xstate); + + private: + /* TODO: + * ASymbolTable + * DLocalSymtab + * DGlobalSymtab + * + * Will also need + * DVariable + * DLambda + * + * For DGlobalSymtab perhaps use DArenaHashMap. + * May also want to use DArenaHashMap+DArena to intern strings + * + * Also: + * TypeUnifier + */ + + /** Arena for internal parsing stack. + * Must be owned exclusively because destructively + * modified as parser completes parsing of each sub-expression + * + * Contents will be a stack of ExprState instances + **/ + DArena parser_alloc_; + +#ifdef NOT_YET + /** Arena for internal environment stack. + * This represents just nesting for environments. + * Details for each frame survive parsing and are + * stored in @ref expr_alloc_. + * Maybe that means we don't need env_alloc_ + **/ + DArena env_alloc_; +#endif + + /** Allocator for parsed expressions. + * Information available during subsequent execution + * (whether compiling or interpreting) must be stored here. + * + * Also use this allocator for error messages arising + * during parsing + * + * Memory use patterns for executions are not predictable, + * and require garbage collection, e.g. DX1Collector. + * + * May alternatively be able to use DArena in a compile-only + * scenario, where top-level Expressions can be discarded + * once compiled. + **/ + obj expr_alloc_; + }; + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ExpressionParser.hpp */ diff --git a/include/xo/reader2/ParserResult.hpp b/include/xo/reader2/ParserResult.hpp new file mode 100644 index 00000000..3aadbdae --- /dev/null +++ b/include/xo/reader2/ParserResult.hpp @@ -0,0 +1,52 @@ +/** @file ParserResult.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include +#include +#include + +namespace xo { + namespace scm { + enum class parser_result_type { + /** no result yet (no input or incomplete expression) **/ + none, + /** emit expression **/ + expression, + /** emit parsing error **/ + error, + N + }; + + class ParserResult { + public: + ParserResult() = default; + ParserResult(parser_result_type type, + obj expr, + std::string_view error_src_fn, + const DString * error_description); + + /** create ParserResult for a parsing error. + * Reporting detailed message @p errmsg + * from syntax state machine @p ssm + **/ + static ParserResult error(std::string_view ssm, + const DString * errmsg); + + parser_result_type result_type() const { return result_type_; } + obj result_expr() const { return result_expr_; } + const DString * error_description() const { return error_description_; } + + private: + parser_result_type result_type_ = parser_result_type::none; + obj result_expr_; + std::string_view error_src_fn_; + const DString * error_description_ = nullptr; + }; + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ParserResult.hpp */ diff --git a/include/xo/reader2/ParserStack.hpp b/include/xo/reader2/ParserStack.hpp new file mode 100644 index 00000000..424be42b --- /dev/null +++ b/include/xo/reader2/ParserStack.hpp @@ -0,0 +1,47 @@ +/** @file ParserStack.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include "SyntaxStateMachine.hpp" +#include +#include + +namespace xo { + namespace scm { + + /** @brief A stack of expression state machines + * + * Each state machine is dedicated to a particular syntax instance. + * The innermost machine is in xsm; machines for surrounding expressions + * are in progressively removed frames reached via parent links. + **/ + class ParserStack { + public: + using AAllocator = xo::mm::AAllocator; + + public: + ParserStack(obj ssm, ParserStack * parent); + + /** create new top of stack for syntax @p ssm, using memory from @p mm. + * previous stack given by @p parent + **/ + ParserStack * push(obj mm, + obj ssm); + + obj top() const noexcept { return ssm_; } + ParserStack * parent() const noexcept { return parent_; } + + private: + /** top of parsing stack: always non-null **/ + obj ssm_; + /** remainder of parsing stack excluding top **/ + ParserStack * parent_ = nullptr; + }; + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ParserStack.hpp */ diff --git a/include/xo/reader2/ParserStateMachine.hpp b/include/xo/reader2/ParserStateMachine.hpp new file mode 100644 index 00000000..339fa029 --- /dev/null +++ b/include/xo/reader2/ParserStateMachine.hpp @@ -0,0 +1,115 @@ +/** @file ParserStateMachine.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include "ParserResult.hpp" +#include +#include +#include + +namespace xo { + namespace scm { + // defined in ssm/ASyntaxStateMachine.hpp, but + // including here would create include cycle + // + class ASyntaxStateMachine; + + // note: load-bearing to forward-declare ParserStack, + // because ASyntaxStateMachine.hpp includes ParserStateMachine.hpp; + // before obj is defined. + class ParserStack; + + /** @brief State machine embodying Schematika parser + **/ + class ParserStateMachine { + public: + using AAllocator = xo::mm::AAllocator; + using ArenaConfig = xo::mm::ArenaConfig; + using DArena = xo::mm::DArena; + + public: + ParserStateMachine(const ArenaConfig & config); + + /** @defgroup scm-parserstatemachine-bookkeeping bookkeeping methods **/ + ///@{ + + /** push syntax @p ssm onto @ref stack_ **/ + void push_ssm(obj ssm); + + ///@} + + /** @defgroup scm-parserstatemachine-inputmethods input methods **/ + ///@{ + + /** update state to respond to input token @p tk. + * record output (if any) in @ref result_ + **/ + void on_token(const Token & tk); + + /** update state for incoming if-token @p tk **/ + void on_if_token(const Token & tk); + + ///@} + + /** @defgroup scm-parserstatemachine-error-entrypoints error entry points **/ + ///@{ + + /** capture error message @p errmsg from @p ssm_name, + * as current state machine output. + * + * @p errmsg will have been allocated from the @p expr_alloc_ allocator + **/ + void capture_error(std::string_view ssm_name, + const DString * errmsg); + + /** report illegal input from syntax state machine @p ssm_name + * recognized on input token @p tk. @p expect_str describes + * expected input in that state + **/ + void illegal_input_on_token(std::string_view ssm_name, + const Token & tk, + std::string_view expect_str); + + ///@} + + private: + /** Arena for internal parsing stack. + * Must be owned exclusively because destructively + * modified as parser completes parsing of each sub-expression + * + * Contents will be a stack of ExprState instances + **/ + DArena parser_alloc_; + + /** parser stack. Memory from @ref parser_alloc_ **/ + ParserStack * stack_ = nullptr; + + /** Allocator for parsed expressions. + * Information available during subsequent execution + * (whether compiling or interpreting) must be stored here. + * + * Also use this allocator for error messages arising + * during parsing + * + * Memory use patterns for executions are not predictable, + * and benefit from garbage collection, e.g. DX1Collector. + * + * May alternatively be able to use DArena in a compile-only + * scenario, where top-level Expressions can be discarded + * once compiled. + **/ + obj expr_alloc_; + + /** current output from parser **/ + ParserResult result_; + + /** true to enable debug output **/ + bool debug_flag_ = false; + }; + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ParserStateMachine.hpp */ diff --git a/include/xo/reader2/Reader.hpp b/include/xo/reader2/Reader.hpp new file mode 100644 index 00000000..9ae65d61 --- /dev/null +++ b/include/xo/reader2/Reader.hpp @@ -0,0 +1,25 @@ +/** @file Reader.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#include + +namespace xo { + namespace scm { + /** @class Reader + * @brief Assemble Schematika expressions from lexical tokens + **/ + class Reader { + public: + private: + /** tokenizer: assembles Schematika tokens from text **/ + Tokenizer tokenizer_; + + /** parser: assemble Schematika expressions from token sequences **/ + ExpressionParser parser_; + }; + } /*namespace scm*/ +} /*namespace xo*/ + +/* end Reader.hpp */ diff --git a/include/xo/reader2/SyntaxStateMachine.hpp b/include/xo/reader2/SyntaxStateMachine.hpp new file mode 100644 index 00000000..deda9e1d --- /dev/null +++ b/include/xo/reader2/SyntaxStateMachine.hpp @@ -0,0 +1,22 @@ +/** @file SyntaxStateMachine.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/SyntaxStateMachine.json5] + * 2. jinja2 template for facet .hpp file: + * [facet.hpp.j2] + * 3. idl for facet methods + * [idl/SyntaxStateMachine.json5] + **/ + +#pragma once + +#include "ssm/ASyntaxStateMachine.hpp" +#include "ssm/ISyntaxStateMachine_Any.hpp" +#include "ssm/ISyntaxStateMachine_Xfer.hpp" +#include "ssm/RSyntaxStateMachine.hpp" + + +/* end SyntaxStateMachine.hpp */ \ No newline at end of file diff --git a/include/xo/reader2/ssm/ASyntaxStateMachine.hpp b/include/xo/reader2/ssm/ASyntaxStateMachine.hpp new file mode 100644 index 00000000..a519b246 --- /dev/null +++ b/include/xo/reader2/ssm/ASyntaxStateMachine.hpp @@ -0,0 +1,78 @@ +/** @file ASyntaxStateMachine.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/SyntaxStateMachine.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [abstract_facet.hpp.j2] + * 3. idl for facet methods + * [idl/SyntaxStateMachine.json5] + **/ + +#pragma once + +// includes (via {facet_includes}) +#include "ParserStateMachine.hpp" +#include "syntaxstatetype.hpp" +#include +#include +#include +#include + +// {pretex} here + +namespace xo { +namespace scm { + +using Copaque = const void *; +using Opaque = void *; + +/** +Assistant to schematika parser dedicated to particular syntax +**/ +class ASyntaxStateMachine { +public: + /** @defgroup scm-syntaxstatemachine-type-traits **/ + ///@{ + // types + /** integer identifying a type **/ + using typeseq = xo::facet::typeseq; + using Copaque = const void *; + using Opaque = void *; + ///@} + + /** @defgroup scm-syntaxstatemachine-methods **/ + ///@{ + // const methods + /** RTTI: unique id# for actual runtime data representation **/ + virtual typeseq _typeseq() const noexcept = 0; + /** identify a type of syntax state machine **/ + virtual syntaxstatetype ssm_type(Copaque data) const noexcept = 0; + /** text describing expected/allowed input to this ssm in current state **/ + virtual std::string_view get_expect_str(Copaque data) const noexcept = 0; + + // nonconst methods + /** update state machine for incoming if-keyword-token @p tk **/ + virtual void on_if_token(Opaque data, const Token & tk, ParserStateMachine * p_psm) = 0; + ///@} +}; /*ASyntaxStateMachine*/ + +/** Implementation ISyntaxStateMachine_DRepr of ASyntaxStateMachine for state DRepr + * should provide a specialization: + * + * template <> + * struct xo::facet::FacetImplementation { + * using Impltype = ISyntaxStateMachine_DRepr; + * }; + * + * then ISyntaxStateMachine_ImplType --> ISyntaxStateMachine_DRepr + **/ +template +using ISyntaxStateMachine_ImplType = xo::facet::FacetImplType; + +} /*namespace scm*/ +} /*namespace xo*/ + +/* ASyntaxStateMachine.hpp */ \ No newline at end of file diff --git a/include/xo/reader2/ssm/ISyntaxStateMachine_Any.hpp b/include/xo/reader2/ssm/ISyntaxStateMachine_Any.hpp new file mode 100644 index 00000000..2d5dd0c3 --- /dev/null +++ b/include/xo/reader2/ssm/ISyntaxStateMachine_Any.hpp @@ -0,0 +1,87 @@ +/** @file ISyntaxStateMachine_Any.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/SyntaxStateMachine.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_any.hpp.j2] + * 3. idl for facet methods + * [idl/SyntaxStateMachine.json5] + **/ + +#pragma once + +#include "ASyntaxStateMachine.hpp" +#include + +namespace xo { namespace scm { class ISyntaxStateMachine_Any; } } + +namespace xo { +namespace facet { + +template <> +struct FacetImplementation +{ + using ImplType = xo::scm::ISyntaxStateMachine_Any; +}; + +} +} + +namespace xo { +namespace scm { + + /** @class ISyntaxStateMachine_Any + * @brief ASyntaxStateMachine implementation for empty variant instance + **/ + class ISyntaxStateMachine_Any : public ASyntaxStateMachine { + public: + /** @defgroup scm-syntaxstatemachine-any-type-traits **/ + ///@{ + + /** integer identifying a type **/ + using typeseq = xo::facet::typeseq; + + ///@} + /** @defgroup scm-syntaxstatemachine-any-methods **/ + ///@{ + + const ASyntaxStateMachine * iface() const { return std::launder(this); } + + // from ASyntaxStateMachine + + // const methods + typeseq _typeseq() const noexcept override { return s_typeseq; } + [[noreturn]] syntaxstatetype ssm_type(Copaque) const noexcept override { _fatal(); } + [[noreturn]] std::string_view get_expect_str(Copaque) const noexcept override { _fatal(); } + + // nonconst methods + [[noreturn]] void on_if_token(Opaque, const Token &, ParserStateMachine *) override; + + ///@} + + private: + /** @defgraoup scm-syntaxstatemachine-any-private-methods **/ + ///@{ + + [[noreturn]] static void _fatal(); + + ///@} + + public: + /** @defgroup scm-syntaxstatemachine-any-member-vars **/ + ///@{ + + static typeseq s_typeseq; + static bool _valid; + + ///@} + }; + +} /*namespace scm */ +} /*namespace xo */ + +/* ISyntaxStateMachine_Any.hpp */ \ No newline at end of file diff --git a/include/xo/reader2/ssm/ISyntaxStateMachine_DExprSeqState.hpp b/include/xo/reader2/ssm/ISyntaxStateMachine_DExprSeqState.hpp new file mode 100644 index 00000000..b1d889b1 --- /dev/null +++ b/include/xo/reader2/ssm/ISyntaxStateMachine_DExprSeqState.hpp @@ -0,0 +1,64 @@ +/** @file ISyntaxStateMachine_DExprSeqState.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/ISyntaxStateMachine_DExprSeqState.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_repr.hpp.j2] + * 3. idl for facet methods + * [idl/ISyntaxStateMachine_DExprSeqState.json5] + **/ + +#pragma once + +#include "SyntaxStateMachine.hpp" +#include "SyntaxStateMachine.hpp" +#include "ssm/ISyntaxStateMachine_Xfer.hpp" +#include "DExprSeqState.hpp" + +namespace xo { namespace scm { class ISyntaxStateMachine_DExprSeqState; } } + +namespace xo { + namespace facet { + template <> + struct FacetImplementation + { + using ImplType = xo::scm::ISyntaxStateMachine_Xfer + ; + }; + } +} + +namespace xo { + namespace scm { + /** @class ISyntaxStateMachine_DExprSeqState + **/ + class ISyntaxStateMachine_DExprSeqState { + public: + /** @defgroup scm-syntaxstatemachine-dexprseqstate-type-traits **/ + ///@{ + using Copaque = xo::scm::ASyntaxStateMachine::Copaque; + using Opaque = xo::scm::ASyntaxStateMachine::Opaque; + ///@} + /** @defgroup scm-syntaxstatemachine-dexprseqstate-methods **/ + ///@{ + // const methods + /** identify a type of syntax state machine **/ + static syntaxstatetype ssm_type(const DExprSeqState & self) noexcept; + /** text describing expected/allowed input to this ssm in current state **/ + static std::string_view get_expect_str(const DExprSeqState & self) noexcept; + + // non-const methods + /** update state machine for incoming if-keyword-token @p tk **/ + static void on_if_token(DExprSeqState & self, const Token & tk, ParserStateMachine * p_psm); + ///@} + }; + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end */ \ No newline at end of file diff --git a/include/xo/reader2/ssm/ISyntaxStateMachine_Xfer.hpp b/include/xo/reader2/ssm/ISyntaxStateMachine_Xfer.hpp new file mode 100644 index 00000000..c3aec83e --- /dev/null +++ b/include/xo/reader2/ssm/ISyntaxStateMachine_Xfer.hpp @@ -0,0 +1,88 @@ +/** @file ISyntaxStateMachine_Xfer.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/SyntaxStateMachine.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_any.hpp.j2] + * 3. idl for facet methods + * [idl/SyntaxStateMachine.json5] + **/ + +#pragma once + +#include "ParserStateMachine.hpp" +#include "syntaxstatetype.hpp" +#include + +namespace xo { +namespace scm { + /** @class ISyntaxStateMachine_Xfer + **/ + template + class ISyntaxStateMachine_Xfer : public ASyntaxStateMachine { + public: + /** @defgroup scm-syntaxstatemachine-xfer-type-traits **/ + ///@{ + /** actual implementation (not generated; often delegates to DRepr) **/ + using Impl = ISyntaxStateMachine_DRepr; + /** integer identifying a type **/ + using typeseq = ASyntaxStateMachine::typeseq; + ///@} + + /** @defgroup scm-syntaxstatemachine-xfer-methods **/ + ///@{ + + static const DRepr & _dcast(Copaque d) { return *(const DRepr *)d; } + static DRepr & _dcast(Opaque d) { return *(DRepr *)d; } + + // from ASyntaxStateMachine + + // const methods + typeseq _typeseq() const noexcept override { return s_typeseq; } + syntaxstatetype ssm_type(Copaque data) const noexcept override { + return I::ssm_type(_dcast(data)); + } + std::string_view get_expect_str(Copaque data) const noexcept override { + return I::get_expect_str(_dcast(data)); + } + + // non-const methods + void on_if_token(Opaque data, const Token & tk, ParserStateMachine * p_psm) override { + return I::on_if_token(_dcast(data), tk, p_psm); + } + + ///@} + + private: + using I = Impl; + + public: + /** @defgroup scm-syntaxstatemachine-xfer-member-vars **/ + ///@{ + + /** typeseq for template parameter DRepr **/ + static typeseq s_typeseq; + /** true iff satisfies facet implementation **/ + static bool _valid; + + ///@} + }; + + template + xo::facet::typeseq + ISyntaxStateMachine_Xfer::s_typeseq + = xo::facet::typeseq::id(); + + template + bool + ISyntaxStateMachine_Xfer::_valid + = xo::facet::valid_facet_implementation(); + +} /*namespace scm */ +} /*namespace xo*/ + +/* end ISyntaxStateMachine_Xfer.hpp */ \ No newline at end of file diff --git a/include/xo/reader2/ssm/RSyntaxStateMachine.hpp b/include/xo/reader2/ssm/RSyntaxStateMachine.hpp new file mode 100644 index 00000000..5f920614 --- /dev/null +++ b/include/xo/reader2/ssm/RSyntaxStateMachine.hpp @@ -0,0 +1,85 @@ +/** @file RSyntaxStateMachine.hpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/SyntaxStateMachine.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_any.hpp.j2] + * 3. idl for facet methods + * [idl/SyntaxStateMachine.json5] + **/ + +#pragma once + +#include "ASyntaxStateMachine.hpp" + +namespace xo { +namespace scm { + +/** @class RSyntaxStateMachine + **/ +template +class RSyntaxStateMachine : public Object { +private: + using O = Object; + +public: + /** @defgroup scm-syntaxstatemachine-router-type-traits **/ + ///@{ + using ObjectType = Object; + using DataPtr = Object::DataPtr; + using typeseq = xo::reflect::typeseq; + ///@} + + /** @defgroup scm-syntaxstatemachine-router-ctors **/ + ///@{ + RSyntaxStateMachine() {} + RSyntaxStateMachine(Object::DataPtr data) : Object{std::move(data)} {} + RSyntaxStateMachine(const ASyntaxStateMachine * iface, void * data) + requires std::is_same_v + : Object(iface, data) {} + + ///@} + /** @defgroup scm-syntaxstatemachine-router-methods **/ + ///@{ + + // const methods + typeseq _typeseq() const noexcept { return O::iface()->_typeseq(); } + syntaxstatetype ssm_type() const noexcept { + return O::iface()->ssm_type(O::data()); + } + std::string_view get_expect_str() const noexcept { + return O::iface()->get_expect_str(O::data()); + } + + // non-const methods (still const in router!) + void on_if_token(const Token & tk, ParserStateMachine * p_psm) { + return O::iface()->on_if_token(O::data(), tk, p_psm); + } + + ///@} + /** @defgroup scm-syntaxstatemachine-member-vars **/ + ///@{ + + static bool _valid; + + ///@} +}; + +template +bool +RSyntaxStateMachine::_valid = xo::facet::valid_object_router(); + +} /*namespace scm*/ +} /*namespace xo*/ + +namespace xo { namespace facet { + template + struct RoutingFor { + using RoutingType = xo::scm::RSyntaxStateMachine; + }; +} } + +/* end RSyntaxStateMachine.hpp */ \ No newline at end of file diff --git a/include/xo/reader2/syntaxstatetype.hpp b/include/xo/reader2/syntaxstatetype.hpp new file mode 100644 index 00000000..913cd118 --- /dev/null +++ b/include/xo/reader2/syntaxstatetype.hpp @@ -0,0 +1,38 @@ +/** @file syntaxstatetype.hpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include + +namespace xo { + namespace scm { + /** @enum syntaxstatemachine + * @brief Label a specialized parsing state machine + * + * Label for a schematika syntax state machine + * dedicated to some particular piece of syntax + **/ + enum class syntaxstatetype { + invalid = -1, + + /** toplevel of some translation unit. See @ref DExprSeqState **/ + expect_toplevel_expression_sequence, + + /** comes lasts, counts number of valid enums **/ + N + }; + + const char * syntaxstatetype_descr(syntaxstatetype x); + + inline std::ostream & + operator<< (std::ostream & os, syntaxstatetype x) { + os << syntaxstatetype_descr(x); + return os; + } + } +} /*namespace xo*/ + +/* end syntaxstatetype.hpp */ diff --git a/src/reader2/CMakeLists.txt b/src/reader2/CMakeLists.txt new file mode 100644 index 00000000..e8bfb1d7 --- /dev/null +++ b/src/reader2/CMakeLists.txt @@ -0,0 +1,30 @@ +# reader2/CMakeLists.txt + +set(SELF_LIB xo_reader2) +set(SELF_SRCS + #init_reader2.cpp + + ParserStateMachine.cpp + ParserStack.cpp + ParserResult.cpp + + ISyntaxStateMachine_Any.cpp + + DExprSeqState.cpp + ISyntaxStateMachine_DExprSeqState.cpp + + #reader2_register_facets.cpp + #reader2_register_types.cpp + ) + +xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) +# note: deps here must also appear in cmake/xo_expression2Config.cmake.in +xo_dependency(${SELF_LIB} xo_gc) +xo_dependency(${SELF_LIB} xo_tokenizer2) +xo_dependency(${SELF_LIB} xo_expression2) +#xo_dependency(${SELF_LIB} reflect) +#xo_dependency(${SELF_LIB} xo_object2) +#xo_dependency(${SELF_LIB} xo_printable2) +#xo_dependency(${SELF_LIB} xo_flatstring) +#xo_dependency(${SELF_LIB} subsys) +#xo_dependency(${SELF_LIB} indentlog) diff --git a/src/reader2/DExprSeqState.cpp b/src/reader2/DExprSeqState.cpp new file mode 100644 index 00000000..7d2368e5 --- /dev/null +++ b/src/reader2/DExprSeqState.cpp @@ -0,0 +1,101 @@ +/** @file DExprSeqState.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "DExprSeqState.hpp" +#include "ssm/ISyntaxStateMachine_DExprSeqState.hpp" + +namespace xo { + using xo::mm::AAllocator; + using xo::facet::with_facet; + using xo::reflect::typeseq; + + namespace scm { + DExprSeqState::DExprSeqState(exprseqtype ty) : seqtype_{ty} + {} + + namespace { + obj + make_exprseq_ssm(obj mm, + exprseqtype seqtype) + { + void * mem = mm.alloc(typeseq::id(), + sizeof(DExprSeqState)); + + DExprSeqState * ssm = new (mem) DExprSeqState(seqtype); + + return with_facet::mkobj(ssm); + } + } + + void + DExprSeqState::start_interactive(obj mm, + ParserStateMachine * p_psm) + { + + + p_psm->push_ssm(make_exprseq_ssm(mm, + exprseqtype::toplevel_interactive)); + } + + void + DExprSeqState::start_batch(obj mm, + ParserStateMachine * p_psm) + { + (void)mm; + (void)p_psm; +#ifdef NOT_YET + p_psm->push_ssm(make_exprseq_ssm(mm, + exprseqtype::toplevel_batch)); +#endif + } + + // SyntaxStateMachine facet methods + + syntaxstatetype + DExprSeqState::ssm_type() const noexcept + { + return syntaxstatetype::expect_toplevel_expression_sequence; + } + + std::string_view + DExprSeqState::get_expect_str() const noexcept + { + // TODO: provisional. Will expand as more syntax implemented + + switch (seqtype_) { + case exprseqtype::toplevel_interactive: + return "def|expression|..."; + case exprseqtype::toplevel_batch: + return "def|..."; + case exprseqtype::N: + break; + } + + assert(false); + return "impossible-DExprSeqState::get_expr_str"; + } + + void + DExprSeqState::on_if_token(const Token & tk, + ParserStateMachine * p_psm) + { + switch (seqtype_) { + case exprseqtype::toplevel_interactive: + assert(false); // DfElseState::start(p_psm); + break; + case exprseqtype::toplevel_batch: + p_psm->illegal_input_on_token("DExprSeqState::on_if_token", + tk, + this->get_expect_str()); + break; + case exprseqtype::N: + assert(false); // unreachable + break; + } + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end DExprSeqState.cpp */ diff --git a/src/reader2/ISyntaxStateMachine_Any.cpp b/src/reader2/ISyntaxStateMachine_Any.cpp new file mode 100644 index 00000000..8c4c0b42 --- /dev/null +++ b/src/reader2/ISyntaxStateMachine_Any.cpp @@ -0,0 +1,47 @@ +/** @file ISyntaxStateMachine_Any.cpp + * + **/ + +#include "ssm/ISyntaxStateMachine_Any.hpp" +#include + +namespace xo { +namespace scm { + +using xo::facet::DVariantPlaceholder; +using xo::facet::typeseq; +using xo::facet::valid_facet_implementation; + +void +ISyntaxStateMachine_Any::_fatal() +{ + /* control here on uninitialized IAllocator_Any. + * Initialized instance will have specific implementation type + */ + std::cerr << "fatal" + << ": attempt to call uninitialized" + << " ISyntaxStateMachine_Any method" + << std::endl; + std::terminate(); +} + +typeseq +ISyntaxStateMachine_Any::s_typeseq = typeseq::id(); + +bool +ISyntaxStateMachine_Any::_valid + = valid_facet_implementation(); + +// nonconst methods + +auto +ISyntaxStateMachine_Any::on_if_token(Opaque, const Token &, ParserStateMachine *) -> void +{ + _fatal(); +} + + +} /*namespace scm*/ +} /*namespace xo*/ + +/* end ISyntaxStateMachine_Any.cpp */ \ No newline at end of file diff --git a/src/reader2/ISyntaxStateMachine_DExprSeqState.cpp b/src/reader2/ISyntaxStateMachine_DExprSeqState.cpp new file mode 100644 index 00000000..87f4f2d8 --- /dev/null +++ b/src/reader2/ISyntaxStateMachine_DExprSeqState.cpp @@ -0,0 +1,39 @@ +/** @file ISyntaxStateMachine_DExprSeqState.cpp + * + * Generated automagically from ingredients: + * 1. code generator: + * [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet] + * arguments: + * --input [idl/ISyntaxStateMachine_DExprSeqState.json5] + * 2. jinja2 template for abstract facet .hpp file: + * [iface_facet_any.hpp.j2] + * 3. idl for facet methods + * [idl/ISyntaxStateMachine_DExprSeqState.json5] +**/ + +#include "ssm/ISyntaxStateMachine_DExprSeqState.hpp" + +namespace xo { + namespace scm { + auto + ISyntaxStateMachine_DExprSeqState::ssm_type(const DExprSeqState & self) noexcept -> syntaxstatetype + { + return self.ssm_type(); + } + + auto + ISyntaxStateMachine_DExprSeqState::get_expect_str(const DExprSeqState & self) noexcept -> std::string_view + { + return self.get_expect_str(); + } + + auto + ISyntaxStateMachine_DExprSeqState::on_if_token(DExprSeqState & self, const Token & tk, ParserStateMachine * p_psm) -> void + { + self.on_if_token(tk, p_psm); + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ISyntaxStateMachine_DExprSeqState.cpp */ \ No newline at end of file diff --git a/src/reader2/ParserResult.cpp b/src/reader2/ParserResult.cpp new file mode 100644 index 00000000..28553228 --- /dev/null +++ b/src/reader2/ParserResult.cpp @@ -0,0 +1,32 @@ +/** @file ParserResult.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "ParserResult.hpp" + +namespace xo { + namespace scm { + ParserResult::ParserResult(parser_result_type type, + obj expr, + std::string_view error_src_fn, + const DString * error_description) + : result_type_{type}, + result_expr_{expr}, + error_src_fn_{error_src_fn}, + error_description_{error_description} + {} + + ParserResult + ParserResult::error(std::string_view ssm_name, + const DString * errmsg) + { + return ParserResult(parser_result_type::error, + obj(), + ssm_name, + errmsg); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ParserResult.cpp */ diff --git a/src/reader2/ParserStack.cpp b/src/reader2/ParserStack.cpp new file mode 100644 index 00000000..7e8f6b1b --- /dev/null +++ b/src/reader2/ParserStack.cpp @@ -0,0 +1,28 @@ +/** @file ParserStack.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "ParserStack.hpp" +#include "SyntaxStateMachine.hpp" + +namespace xo { + using xo::facet::typeseq; + + namespace scm { + + ParserStack * + ParserStack::push(obj mm, + obj ssm) + + { + void * mem = mm.alloc(typeseq::id(), + sizeof(ParserStack)); + + return new (mem) ParserStack(ssm, parent_); + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ParserStack.cpp */ diff --git a/src/reader2/ParserStateMachine.cpp b/src/reader2/ParserStateMachine.cpp new file mode 100644 index 00000000..a691bb57 --- /dev/null +++ b/src/reader2/ParserStateMachine.cpp @@ -0,0 +1,134 @@ +/** @file ParserStateMachine.cpp +* + * @author Roland Conybeare, Jan 2026 + **/ + +#include "ParserStateMachine.hpp" +#include "ParserStack.hpp" +#include "SyntaxStateMachine.hpp" +#include +#include +#include +#include +#include + +namespace xo { + using xo::facet::with_facet; + + namespace scm { + void + ParserStateMachine::push_ssm(obj ssm) + { + scope log(XO_DEBUG(debug_flag_)); + + // note: using parser_alloc_ for parser stack, since stacklike behavior + + auto alloc = with_facet::mkobj(&parser_alloc_); + + this->stack_ = stack_->push(alloc, ssm); + } + + void + ParserStateMachine::on_token(const Token & tk) + { + scope log(XO_DEBUG(debug_flag_), xtag("tk", tk)); + + if (!stack_) { + // parsing stack should always have toplevel expression sequence + throw std::runtime_error(tostr("unexpected empty parsing stack", + xtag("token", tk), + xtag("help", "do it the same. but better!") + )); + } + + switch (tk.tk_type()) { + case tokentype::tk_if: + this->on_if_token(tk); + break; + + + // all the not-yet handled cases + case tokentype::tk_invalid: + case tokentype::tk_bool: + case tokentype::tk_i64: + case tokentype::tk_f64: + case tokentype::tk_string: + case tokentype::tk_symbol: + case tokentype::tk_leftparen: + case tokentype::tk_rightparen: + case tokentype::tk_leftbracket: + case tokentype::tk_rightbracket: + case tokentype::tk_leftbrace: + case tokentype::tk_rightbrace: + case tokentype::tk_leftangle: + case tokentype::tk_rightangle: + case tokentype::tk_lessequal: + case tokentype::tk_greatequal: + case tokentype::tk_dot: + case tokentype::tk_comma: + case tokentype::tk_colon: + case tokentype::tk_doublecolon: + case tokentype::tk_semicolon: + case tokentype::tk_singleassign: + case tokentype::tk_assign: + case tokentype::tk_yields: + case tokentype::tk_plus: + case tokentype::tk_minus: + case tokentype::tk_star: + case tokentype::tk_slash: + case tokentype::tk_cmpeq: + case tokentype::tk_cmpne: + case tokentype::tk_type: + case tokentype::tk_def: + case tokentype::tk_lambda: + case tokentype::tk_then: + case tokentype::tk_else: + case tokentype::tk_let: + case tokentype::tk_in: + case tokentype::tk_end: + case tokentype::N: + throw std::runtime_error(tostr("NOT IMPLEMENTED", + xtag("token", tk))); + + } + } + + void + ParserStateMachine::on_if_token(const Token & tk) + { + scope log(XO_DEBUG(debug_flag_), xtag("tk", tk)); + + stack_->top().on_if_token(tk, this); + } + + void + ParserStateMachine::capture_error(std::string_view ssm_name, + const DString * errmsg) + { + this->result_ = ParserResult::error(ssm_name, errmsg); + } + + void + ParserStateMachine::illegal_input_on_token(std::string_view ssm_name, + const Token & tk, + std::string_view expect_str) + { + // TODO: + // - want to write error message using DArena + // - need something like log_streambuf and/or tostr() that's arena-aware + + auto errmsg_string = tostr("Unexpected token for parsing state", + xtag("token", tk), + xtag("expecting", expect_str), + xtag("ssm", ssm_name), + xtag("via", "ParserStateMachine::illegal_input_on_token")); + + auto errmsg = DString::from_view(expr_alloc_, + std::string_view(errmsg_string)); + + this->capture_error(ssm_name, errmsg); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end ParserStateMachine.cpp */