xo-reader2 scaffold (fomo+arena version of xo-reader/) [WIP]

This commit is contained in:
Roland Conybeare 2026-01-18 17:59:46 -05:00
commit 7ee57309b5
25 changed files with 1378 additions and 11 deletions

View file

@ -0,0 +1,76 @@
/** @file DExprSeqState.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include "ParserStateMachine.hpp"
#include "SyntaxStateMachine.hpp"
#include "syntaxstatetype.hpp"
#include <xo/facet/obj.hpp>
namespace xo {
namespace scm {
enum class exprseqtype {
/** toplevel interactive sequence.
* allows: rvalue expressions
**/
toplevel_interactive,
/** toplevel non-interactive sequence.
* allows:
**/
toplevel_batch,
/** counts number of valid enums **/
N
};
/** @class DExprSeqState
* @brief state machine for parsing a sequence of expression
*
* Similar to exprseq_xs in xo-expresion
**/
class DExprSeqState {
public:
using AAllocator = xo::mm::AAllocator;
public:
explicit DExprSeqState(exprseqtype ty);
/** start interactive top-level session **/
static void start_interactive(obj<AAllocator> mm,
ParserStateMachine * p_psm);
/** start non-interactive top-level session **/
static void start_batch(obj<AAllocator> mm,
ParserStateMachine * p_psm);
public:
/** @defgroup scm-exprseq-ssm-facet syntaxstatemachine facet methods **/
///@{
/** identifies the ssm implemented here **/
syntaxstatetype ssm_type() const noexcept;
/** text describing expected/allowed input to this ssm in current state.
* Intended to drive error mesages
**/
std::string_view get_expect_str() const noexcept;
/** update state for this syntax on incoming token @p tk,
* overall parser state in @p p_psm
**/
void on_if_token(const Token & tk, ParserStateMachine * p_psm);
///@}
private:
/** sequence type. accept rvalue expressions when
* this is toplevel_interactive.
* Always accept definitions and declarations.
**/
exprseqtype seqtype_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end DExprSeqState.hpp */

View file

@ -0,0 +1,80 @@
/** @file ExpressionParser.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "ExprState.hpp"
#include <xo/alloc2/Allocator.hpp>
#include <xo/alloc2/arena/IAllocator_DArena.hpp>
#include <xo/facet/obj.hpp>
namespace xo {
namespace scm {
/** @class ExpressionParser
* @brief Assemble Schematika expressions from token sequences
*
* Parser represents Each partially assembled expression by
* an ExprState object.
* Expreesions form a tree:
* each expression belongs to at most one parent.
*
**/
class ExpressionParser {
public:
void push_exprstate(obj<AExprState> xstate);
private:
/* TODO:
* ASymbolTable
* DLocalSymtab
* DGlobalSymtab
*
* Will also need
* DVariable
* DLambda
*
* For DGlobalSymtab perhaps use DArenaHashMap.
* May also want to use DArenaHashMap+DArena to intern strings
*
* Also:
* TypeUnifier
*/
/** Arena for internal parsing stack.
* Must be owned exclusively because destructively
* modified as parser completes parsing of each sub-expression
*
* Contents will be a stack of ExprState instances
**/
DArena parser_alloc_;
#ifdef NOT_YET
/** Arena for internal environment stack.
* This represents just nesting for environments.
* Details for each frame survive parsing and are
* stored in @ref expr_alloc_.
* Maybe that means we don't need env_alloc_
**/
DArena env_alloc_;
#endif
/** Allocator for parsed expressions.
* Information available during subsequent execution
* (whether compiling or interpreting) must be stored here.
*
* Also use this allocator for error messages arising
* during parsing
*
* Memory use patterns for executions are not predictable,
* and require garbage collection, e.g. DX1Collector.
*
* May alternatively be able to use DArena in a compile-only
* scenario, where top-level Expressions can be discarded
* once compiled.
**/
obj<AAllocator> expr_alloc_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ExpressionParser.hpp */

View file

@ -0,0 +1,52 @@
/** @file ParserResult.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include <xo/expression2/Expression.hpp>
#include <xo/object2/DString.hpp>
#include <string_view>
namespace xo {
namespace scm {
enum class parser_result_type {
/** no result yet (no input or incomplete expression) **/
none,
/** emit expression **/
expression,
/** emit parsing error **/
error,
N
};
class ParserResult {
public:
ParserResult() = default;
ParserResult(parser_result_type type,
obj<AExpression> expr,
std::string_view error_src_fn,
const DString * error_description);
/** create ParserResult for a parsing error.
* Reporting detailed message @p errmsg
* from syntax state machine @p ssm
**/
static ParserResult error(std::string_view ssm,
const DString * errmsg);
parser_result_type result_type() const { return result_type_; }
obj<AExpression> result_expr() const { return result_expr_; }
const DString * error_description() const { return error_description_; }
private:
parser_result_type result_type_ = parser_result_type::none;
obj<AExpression> result_expr_;
std::string_view error_src_fn_;
const DString * error_description_ = nullptr;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ParserResult.hpp */

View file

@ -0,0 +1,47 @@
/** @file ParserStack.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include "SyntaxStateMachine.hpp"
#include <xo/alloc2/Allocator.hpp>
#include <xo/facet/obj.hpp>
namespace xo {
namespace scm {
/** @brief A stack of expression state machines
*
* Each state machine is dedicated to a particular syntax instance.
* The innermost machine is in xsm; machines for surrounding expressions
* are in progressively removed frames reached via parent links.
**/
class ParserStack {
public:
using AAllocator = xo::mm::AAllocator;
public:
ParserStack(obj<ASyntaxStateMachine> ssm, ParserStack * parent);
/** create new top of stack for syntax @p ssm, using memory from @p mm.
* previous stack given by @p parent
**/
ParserStack * push(obj<AAllocator> mm,
obj<ASyntaxStateMachine> ssm);
obj<ASyntaxStateMachine> top() const noexcept { return ssm_; }
ParserStack * parent() const noexcept { return parent_; }
private:
/** top of parsing stack: always non-null **/
obj<ASyntaxStateMachine> ssm_;
/** remainder of parsing stack excluding top **/
ParserStack * parent_ = nullptr;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ParserStack.hpp */

View file

@ -0,0 +1,115 @@
/** @file ParserStateMachine.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include "ParserResult.hpp"
#include <xo/tokenizer2/Token.hpp>
#include <xo/alloc2/Allocator.hpp>
#include <xo/arena/DArena.hpp>
namespace xo {
namespace scm {
// defined in ssm/ASyntaxStateMachine.hpp, but
// including here would create include cycle
//
class ASyntaxStateMachine;
// note: load-bearing to forward-declare ParserStack,
// because ASyntaxStateMachine.hpp includes ParserStateMachine.hpp;
// before obj<SyntaxStateMachine> is defined.
class ParserStack;
/** @brief State machine embodying Schematika parser
**/
class ParserStateMachine {
public:
using AAllocator = xo::mm::AAllocator;
using ArenaConfig = xo::mm::ArenaConfig;
using DArena = xo::mm::DArena;
public:
ParserStateMachine(const ArenaConfig & config);
/** @defgroup scm-parserstatemachine-bookkeeping bookkeeping methods **/
///@{
/** push syntax @p ssm onto @ref stack_ **/
void push_ssm(obj<ASyntaxStateMachine> ssm);
///@}
/** @defgroup scm-parserstatemachine-inputmethods input methods **/
///@{
/** update state to respond to input token @p tk.
* record output (if any) in @ref result_
**/
void on_token(const Token & tk);
/** update state for incoming if-token @p tk **/
void on_if_token(const Token & tk);
///@}
/** @defgroup scm-parserstatemachine-error-entrypoints error entry points **/
///@{
/** capture error message @p errmsg from @p ssm_name,
* as current state machine output.
*
* @p errmsg will have been allocated from the @p expr_alloc_ allocator
**/
void capture_error(std::string_view ssm_name,
const DString * errmsg);
/** report illegal input from syntax state machine @p ssm_name
* recognized on input token @p tk. @p expect_str describes
* expected input in that state
**/
void illegal_input_on_token(std::string_view ssm_name,
const Token & tk,
std::string_view expect_str);
///@}
private:
/** Arena for internal parsing stack.
* Must be owned exclusively because destructively
* modified as parser completes parsing of each sub-expression
*
* Contents will be a stack of ExprState instances
**/
DArena parser_alloc_;
/** parser stack. Memory from @ref parser_alloc_ **/
ParserStack * stack_ = nullptr;
/** Allocator for parsed expressions.
* Information available during subsequent execution
* (whether compiling or interpreting) must be stored here.
*
* Also use this allocator for error messages arising
* during parsing
*
* Memory use patterns for executions are not predictable,
* and benefit from garbage collection, e.g. DX1Collector.
*
* May alternatively be able to use DArena in a compile-only
* scenario, where top-level Expressions can be discarded
* once compiled.
**/
obj<AAllocator> expr_alloc_;
/** current output from parser **/
ParserResult result_;
/** true to enable debug output **/
bool debug_flag_ = false;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ParserStateMachine.hpp */

View file

@ -0,0 +1,25 @@
/** @file Reader.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#include <xo/tokenizer2/Tokenizer.hpp>
namespace xo {
namespace scm {
/** @class Reader
* @brief Assemble Schematika expressions from lexical tokens
**/
class Reader {
public:
private:
/** tokenizer: assembles Schematika tokens from text **/
Tokenizer tokenizer_;
/** parser: assemble Schematika expressions from token sequences **/
ExpressionParser parser_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end Reader.hpp */

View file

@ -0,0 +1,22 @@
/** @file SyntaxStateMachine.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for facet .hpp file:
* [facet.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ssm/ASyntaxStateMachine.hpp"
#include "ssm/ISyntaxStateMachine_Any.hpp"
#include "ssm/ISyntaxStateMachine_Xfer.hpp"
#include "ssm/RSyntaxStateMachine.hpp"
/* end SyntaxStateMachine.hpp */

View file

@ -0,0 +1,78 @@
/** @file ASyntaxStateMachine.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [abstract_facet.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
// includes (via {facet_includes})
#include "ParserStateMachine.hpp"
#include "syntaxstatetype.hpp"
#include <xo/tokenizer2/Token.hpp>
#include <xo/facet/obj.hpp>
#include <xo/facet/facet_implementation.hpp>
#include <xo/facet/typeseq.hpp>
// {pretex} here
namespace xo {
namespace scm {
using Copaque = const void *;
using Opaque = void *;
/**
Assistant to schematika parser dedicated to particular syntax
**/
class ASyntaxStateMachine {
public:
/** @defgroup scm-syntaxstatemachine-type-traits **/
///@{
// types
/** integer identifying a type **/
using typeseq = xo::facet::typeseq;
using Copaque = const void *;
using Opaque = void *;
///@}
/** @defgroup scm-syntaxstatemachine-methods **/
///@{
// const methods
/** RTTI: unique id# for actual runtime data representation **/
virtual typeseq _typeseq() const noexcept = 0;
/** identify a type of syntax state machine **/
virtual syntaxstatetype ssm_type(Copaque data) const noexcept = 0;
/** text describing expected/allowed input to this ssm in current state **/
virtual std::string_view get_expect_str(Copaque data) const noexcept = 0;
// nonconst methods
/** update state machine for incoming if-keyword-token @p tk **/
virtual void on_if_token(Opaque data, const Token & tk, ParserStateMachine * p_psm) = 0;
///@}
}; /*ASyntaxStateMachine*/
/** Implementation ISyntaxStateMachine_DRepr of ASyntaxStateMachine for state DRepr
* should provide a specialization:
*
* template <>
* struct xo::facet::FacetImplementation<ASyntaxStateMachine, DRepr> {
* using Impltype = ISyntaxStateMachine_DRepr;
* };
*
* then ISyntaxStateMachine_ImplType<DRepr> --> ISyntaxStateMachine_DRepr
**/
template <typename DRepr>
using ISyntaxStateMachine_ImplType = xo::facet::FacetImplType<ASyntaxStateMachine, DRepr>;
} /*namespace scm*/
} /*namespace xo*/
/* ASyntaxStateMachine.hpp */

View file

@ -0,0 +1,87 @@
/** @file ISyntaxStateMachine_Any.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ASyntaxStateMachine.hpp"
#include <xo/facet/obj.hpp>
namespace xo { namespace scm { class ISyntaxStateMachine_Any; } }
namespace xo {
namespace facet {
template <>
struct FacetImplementation<xo::scm::ASyntaxStateMachine,
DVariantPlaceholder>
{
using ImplType = xo::scm::ISyntaxStateMachine_Any;
};
}
}
namespace xo {
namespace scm {
/** @class ISyntaxStateMachine_Any
* @brief ASyntaxStateMachine implementation for empty variant instance
**/
class ISyntaxStateMachine_Any : public ASyntaxStateMachine {
public:
/** @defgroup scm-syntaxstatemachine-any-type-traits **/
///@{
/** integer identifying a type **/
using typeseq = xo::facet::typeseq;
///@}
/** @defgroup scm-syntaxstatemachine-any-methods **/
///@{
const ASyntaxStateMachine * iface() const { return std::launder(this); }
// from ASyntaxStateMachine
// const methods
typeseq _typeseq() const noexcept override { return s_typeseq; }
[[noreturn]] syntaxstatetype ssm_type(Copaque) const noexcept override { _fatal(); }
[[noreturn]] std::string_view get_expect_str(Copaque) const noexcept override { _fatal(); }
// nonconst methods
[[noreturn]] void on_if_token(Opaque, const Token &, ParserStateMachine *) override;
///@}
private:
/** @defgraoup scm-syntaxstatemachine-any-private-methods **/
///@{
[[noreturn]] static void _fatal();
///@}
public:
/** @defgroup scm-syntaxstatemachine-any-member-vars **/
///@{
static typeseq s_typeseq;
static bool _valid;
///@}
};
} /*namespace scm */
} /*namespace xo */
/* ISyntaxStateMachine_Any.hpp */

View file

@ -0,0 +1,64 @@
/** @file ISyntaxStateMachine_DExprSeqState.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/ISyntaxStateMachine_DExprSeqState.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_repr.hpp.j2]
* 3. idl for facet methods
* [idl/ISyntaxStateMachine_DExprSeqState.json5]
**/
#pragma once
#include "SyntaxStateMachine.hpp"
#include "SyntaxStateMachine.hpp"
#include "ssm/ISyntaxStateMachine_Xfer.hpp"
#include "DExprSeqState.hpp"
namespace xo { namespace scm { class ISyntaxStateMachine_DExprSeqState; } }
namespace xo {
namespace facet {
template <>
struct FacetImplementation<xo::scm::ASyntaxStateMachine,
xo::scm::DExprSeqState>
{
using ImplType = xo::scm::ISyntaxStateMachine_Xfer
<xo::scm::DExprSeqState,
xo::scm::ISyntaxStateMachine_DExprSeqState>;
};
}
}
namespace xo {
namespace scm {
/** @class ISyntaxStateMachine_DExprSeqState
**/
class ISyntaxStateMachine_DExprSeqState {
public:
/** @defgroup scm-syntaxstatemachine-dexprseqstate-type-traits **/
///@{
using Copaque = xo::scm::ASyntaxStateMachine::Copaque;
using Opaque = xo::scm::ASyntaxStateMachine::Opaque;
///@}
/** @defgroup scm-syntaxstatemachine-dexprseqstate-methods **/
///@{
// const methods
/** identify a type of syntax state machine **/
static syntaxstatetype ssm_type(const DExprSeqState & self) noexcept;
/** text describing expected/allowed input to this ssm in current state **/
static std::string_view get_expect_str(const DExprSeqState & self) noexcept;
// non-const methods
/** update state machine for incoming if-keyword-token @p tk **/
static void on_if_token(DExprSeqState & self, const Token & tk, ParserStateMachine * p_psm);
///@}
};
} /*namespace scm*/
} /*namespace xo*/
/* end */

View file

@ -0,0 +1,88 @@
/** @file ISyntaxStateMachine_Xfer.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ParserStateMachine.hpp"
#include "syntaxstatetype.hpp"
#include <xo/tokenizer2/Token.hpp>
namespace xo {
namespace scm {
/** @class ISyntaxStateMachine_Xfer
**/
template <typename DRepr, typename ISyntaxStateMachine_DRepr>
class ISyntaxStateMachine_Xfer : public ASyntaxStateMachine {
public:
/** @defgroup scm-syntaxstatemachine-xfer-type-traits **/
///@{
/** actual implementation (not generated; often delegates to DRepr) **/
using Impl = ISyntaxStateMachine_DRepr;
/** integer identifying a type **/
using typeseq = ASyntaxStateMachine::typeseq;
///@}
/** @defgroup scm-syntaxstatemachine-xfer-methods **/
///@{
static const DRepr & _dcast(Copaque d) { return *(const DRepr *)d; }
static DRepr & _dcast(Opaque d) { return *(DRepr *)d; }
// from ASyntaxStateMachine
// const methods
typeseq _typeseq() const noexcept override { return s_typeseq; }
syntaxstatetype ssm_type(Copaque data) const noexcept override {
return I::ssm_type(_dcast(data));
}
std::string_view get_expect_str(Copaque data) const noexcept override {
return I::get_expect_str(_dcast(data));
}
// non-const methods
void on_if_token(Opaque data, const Token & tk, ParserStateMachine * p_psm) override {
return I::on_if_token(_dcast(data), tk, p_psm);
}
///@}
private:
using I = Impl;
public:
/** @defgroup scm-syntaxstatemachine-xfer-member-vars **/
///@{
/** typeseq for template parameter DRepr **/
static typeseq s_typeseq;
/** true iff satisfies facet implementation **/
static bool _valid;
///@}
};
template <typename DRepr, typename ISyntaxStateMachine_DRepr>
xo::facet::typeseq
ISyntaxStateMachine_Xfer<DRepr, ISyntaxStateMachine_DRepr>::s_typeseq
= xo::facet::typeseq::id<DRepr>();
template <typename DRepr, typename ISyntaxStateMachine_DRepr>
bool
ISyntaxStateMachine_Xfer<DRepr, ISyntaxStateMachine_DRepr>::_valid
= xo::facet::valid_facet_implementation<ASyntaxStateMachine,
ISyntaxStateMachine_Xfer>();
} /*namespace scm */
} /*namespace xo*/
/* end ISyntaxStateMachine_Xfer.hpp */

View file

@ -0,0 +1,85 @@
/** @file RSyntaxStateMachine.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ASyntaxStateMachine.hpp"
namespace xo {
namespace scm {
/** @class RSyntaxStateMachine
**/
template <typename Object>
class RSyntaxStateMachine : public Object {
private:
using O = Object;
public:
/** @defgroup scm-syntaxstatemachine-router-type-traits **/
///@{
using ObjectType = Object;
using DataPtr = Object::DataPtr;
using typeseq = xo::reflect::typeseq;
///@}
/** @defgroup scm-syntaxstatemachine-router-ctors **/
///@{
RSyntaxStateMachine() {}
RSyntaxStateMachine(Object::DataPtr data) : Object{std::move(data)} {}
RSyntaxStateMachine(const ASyntaxStateMachine * iface, void * data)
requires std::is_same_v<typename Object::DataType, xo::facet::DVariantPlaceholder>
: Object(iface, data) {}
///@}
/** @defgroup scm-syntaxstatemachine-router-methods **/
///@{
// const methods
typeseq _typeseq() const noexcept { return O::iface()->_typeseq(); }
syntaxstatetype ssm_type() const noexcept {
return O::iface()->ssm_type(O::data());
}
std::string_view get_expect_str() const noexcept {
return O::iface()->get_expect_str(O::data());
}
// non-const methods (still const in router!)
void on_if_token(const Token & tk, ParserStateMachine * p_psm) {
return O::iface()->on_if_token(O::data(), tk, p_psm);
}
///@}
/** @defgroup scm-syntaxstatemachine-member-vars **/
///@{
static bool _valid;
///@}
};
template <typename Object>
bool
RSyntaxStateMachine<Object>::_valid = xo::facet::valid_object_router<Object>();
} /*namespace scm*/
} /*namespace xo*/
namespace xo { namespace facet {
template <typename Object>
struct RoutingFor<xo::scm::ASyntaxStateMachine, Object> {
using RoutingType = xo::scm::RSyntaxStateMachine<Object>;
};
} }
/* end RSyntaxStateMachine.hpp */

View file

@ -0,0 +1,38 @@
/** @file syntaxstatetype.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include <ostream>
namespace xo {
namespace scm {
/** @enum syntaxstatemachine
* @brief Label a specialized parsing state machine
*
* Label for a schematika syntax state machine
* dedicated to some particular piece of syntax
**/
enum class syntaxstatetype {
invalid = -1,
/** toplevel of some translation unit. See @ref DExprSeqState **/
expect_toplevel_expression_sequence,
/** comes lasts, counts number of valid enums **/
N
};
const char * syntaxstatetype_descr(syntaxstatetype x);
inline std::ostream &
operator<< (std::ostream & os, syntaxstatetype x) {
os << syntaxstatetype_descr(x);
return os;
}
}
} /*namespace xo*/
/* end syntaxstatetype.hpp */