xo-reader2 scaffold (fomo+arena version of xo-reader/) [WIP]

This commit is contained in:
Roland Conybeare 2026-01-18 17:59:46 -05:00
commit 15d9448d03
29 changed files with 1385 additions and 22 deletions

View file

@ -106,6 +106,7 @@ add_subdirectory(xo-ordinaltree)
#
add_subdirectory(xo-tokenizer2) # schematika tokenizer (fomo)
add_subdirectory(xo-expression2) # schematika expressions (fomo)
add_subdirectory(xo-reader2) # schematika expression parser (fomo)
add_subdirectory(xo-interpreter2) # schematika interpreter (fomo)
#
add_subdirectory(xo-webutil)

View file

@ -91,18 +91,13 @@ xo_add_genfacetimpl(
)
# ----------------------------------------------------------------
# header-only library
# shared library
add_subdirectory(src/expression2)
# ----------------------------------------------------------------
# cmake helper (for external xo-expression2 users)
xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)
# ----------------------------------------------------------------
# input dependencies
#
# NOTE: dependency set here must be kept consistent with
# xo-expression2/cmake/xo_expression2Config.cmake.in
#xo_headeronly_dependency(${SELF_LIB} xo_flatstring)
# end CMakeLists.txt

View file

@ -22,20 +22,36 @@ add_definitions(${PROJECT_CXX_FLAGS})
#add_subdirectory(utest)
# ----------------------------------------------------------------
# header-only library
# note: manual target; generated code committed to git
xo_add_genfacet(
TARGET xo-reader2-facet-syntaxstatemachine
FACET SyntaxStateMachine
INPUT idl/SyntaxStateMachine.json5
OUTPUT_HPP_DIR include/xo/reader2
OUTPUT_IMPL_SUBDIR ssm
OUTPUT_CPP_DIR src/reader2
)
# note: manual target; generated code committed to git
xo_add_genfacetimpl(
TARGET xo-reader2-facetimpl-syntaxstatemachine-exprseqstate
FACET_PKG xo_reader2
FACET SyntaxStateMachine
REPR ExprSeqState
INPUT idl/ISyntaxStateMachine_DExprSeqState.json5
OUTPUT_HPP_DIR include/xo/reader2
OUTPUT_IMPL_SUBDIR ssm
OUTPUT_CPP_DIR src/reader2
)
# ----------------------------------------------------------------
# shared library
add_subdirectory(src/reader2)
# ----------------------------------------------------------------
# cmake helper (for external xo-reader2 users)
set(SELF_LIB xo_reader2)
xo_add_headeronly_library(${SELF_LIB})
xo_install_library4(${SELF_LIB} ${PROJECT_NAME}Targets)
xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)
# ----------------------------------------------------------------
# input dependencies
#
# NOTE: dependency set here must be kept consistent with
# xo-reader2/cmake/xo_reader2Config.cmake.in
#xo_headeronly_dependency(${SELF_LIB} xo_flatstring)
# end CMakeLists.txt

12
xo-reader2/DESIGN.md Normal file
View file

@ -0,0 +1,12 @@
Uses arena allocators for fast+efficient parsing.
Composition of nested state machines.
## SyntaxStateMachine
a state machine dedicated to some particular Schematika syntax.
Examples: if-expression, type declaration, function call
## DExprSeqState
top-level expression sequence

View file

@ -6,7 +6,9 @@ include(CMakeFindDependencyMacro)
# must coordinate with xo_dependency() calls
# in CMakeLists.txt
#
#find_dependency(xo_flatstring)
find_dependency(xo_gc)
find_dependency(xo_tokenizer2)
find_dependency(xo_expression2)
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
check_required_components("@PROJECT_NAME@")

View file

@ -0,0 +1,13 @@
{
mode: "implementation",
includes: [ "\"SyntaxStateMachine.hpp\"",
"\"ssm/ISyntaxStateMachine_Xfer.hpp\"" ],
local_types: [ ],
namespace1: "xo",
namespace2: "scm",
facet_idl: "idl/SyntaxStateMachine.json5",
brief: "provide ASyntaxStateMachine interface for DExprSeqState",
using_doxygen: true,
repr: "DExprSeqState",
doc: [ "implement ASyntaxStateMachine for DExprSeqState" ],
}

View file

@ -0,0 +1,56 @@
{
mode: "facet",
// includes in ASyntaxStateMachine.hpp
includes: [
"\"ParserStateMachine.hpp\"",
"\"syntaxstatetype.hpp\"",
"<xo/tokenizer2/Token.hpp>",
],
// extra includes in SyntaxStateMachine.hpp, if any
user_hpp_includes: [],
namespace1: "xo",
namespace2: "scm",
// text after includes, before ASyntaxStateMachine
pretext: ["// {pretex} here"],
facet: "SyntaxStateMachine",
detail_subdir: "ssm",
brief: "specialized state machine for parsing some particular schematika syntax",
using_doxygen: true,
doc: [
"Assistant to schematika parser dedicated to particular syntax"
],
types: [
// { name: string, doc: [ string ], definition: string },
],
const_methods: [
{
name: "ssm_type",
doc: ["identify a type of syntax state machine"],
return_type: "syntaxstatetype",
args: [],
const: true,
noexcept: true,
attributes: [],
},
{
name: "get_expect_str",
doc: ["text describing expected/allowed input to this ssm in current state"],
return_type: "std::string_view",
args: [],
const: true,
noexcept: true,
attributes: [],
},
],
nonconst_methods: [
{
name: "on_if_token",
doc: ["update state machine for incoming if-keyword-token @p tk"],
return_type: "void",
args: [
{type: "const Token &", name: "tk"},
{type: "ParserStateMachine *", name: "p_psm"},
],
},
],
}

View file

@ -0,0 +1,76 @@
/** @file DExprSeqState.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include "ParserStateMachine.hpp"
#include "SyntaxStateMachine.hpp"
#include "syntaxstatetype.hpp"
#include <xo/facet/obj.hpp>
namespace xo {
namespace scm {
enum class exprseqtype {
/** toplevel interactive sequence.
* allows: rvalue expressions
**/
toplevel_interactive,
/** toplevel non-interactive sequence.
* allows:
**/
toplevel_batch,
/** counts number of valid enums **/
N
};
/** @class DExprSeqState
* @brief state machine for parsing a sequence of expression
*
* Similar to exprseq_xs in xo-expresion
**/
class DExprSeqState {
public:
using AAllocator = xo::mm::AAllocator;
public:
explicit DExprSeqState(exprseqtype ty);
/** start interactive top-level session **/
static void start_interactive(obj<AAllocator> mm,
ParserStateMachine * p_psm);
/** start non-interactive top-level session **/
static void start_batch(obj<AAllocator> mm,
ParserStateMachine * p_psm);
public:
/** @defgroup scm-exprseq-ssm-facet syntaxstatemachine facet methods **/
///@{
/** identifies the ssm implemented here **/
syntaxstatetype ssm_type() const noexcept;
/** text describing expected/allowed input to this ssm in current state.
* Intended to drive error mesages
**/
std::string_view get_expect_str() const noexcept;
/** update state for this syntax on incoming token @p tk,
* overall parser state in @p p_psm
**/
void on_if_token(const Token & tk, ParserStateMachine * p_psm);
///@}
private:
/** sequence type. accept rvalue expressions when
* this is toplevel_interactive.
* Always accept definitions and declarations.
**/
exprseqtype seqtype_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end DExprSeqState.hpp */

View file

@ -0,0 +1,80 @@
/** @file ExpressionParser.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "ExprState.hpp"
#include <xo/alloc2/Allocator.hpp>
#include <xo/alloc2/arena/IAllocator_DArena.hpp>
#include <xo/facet/obj.hpp>
namespace xo {
namespace scm {
/** @class ExpressionParser
* @brief Assemble Schematika expressions from token sequences
*
* Parser represents Each partially assembled expression by
* an ExprState object.
* Expreesions form a tree:
* each expression belongs to at most one parent.
*
**/
class ExpressionParser {
public:
void push_exprstate(obj<AExprState> xstate);
private:
/* TODO:
* ASymbolTable
* DLocalSymtab
* DGlobalSymtab
*
* Will also need
* DVariable
* DLambda
*
* For DGlobalSymtab perhaps use DArenaHashMap.
* May also want to use DArenaHashMap+DArena to intern strings
*
* Also:
* TypeUnifier
*/
/** Arena for internal parsing stack.
* Must be owned exclusively because destructively
* modified as parser completes parsing of each sub-expression
*
* Contents will be a stack of ExprState instances
**/
DArena parser_alloc_;
#ifdef NOT_YET
/** Arena for internal environment stack.
* This represents just nesting for environments.
* Details for each frame survive parsing and are
* stored in @ref expr_alloc_.
* Maybe that means we don't need env_alloc_
**/
DArena env_alloc_;
#endif
/** Allocator for parsed expressions.
* Information available during subsequent execution
* (whether compiling or interpreting) must be stored here.
*
* Also use this allocator for error messages arising
* during parsing
*
* Memory use patterns for executions are not predictable,
* and require garbage collection, e.g. DX1Collector.
*
* May alternatively be able to use DArena in a compile-only
* scenario, where top-level Expressions can be discarded
* once compiled.
**/
obj<AAllocator> expr_alloc_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ExpressionParser.hpp */

View file

@ -0,0 +1,52 @@
/** @file ParserResult.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include <xo/expression2/Expression.hpp>
#include <xo/object2/DString.hpp>
#include <string_view>
namespace xo {
namespace scm {
enum class parser_result_type {
/** no result yet (no input or incomplete expression) **/
none,
/** emit expression **/
expression,
/** emit parsing error **/
error,
N
};
class ParserResult {
public:
ParserResult() = default;
ParserResult(parser_result_type type,
obj<AExpression> expr,
std::string_view error_src_fn,
const DString * error_description);
/** create ParserResult for a parsing error.
* Reporting detailed message @p errmsg
* from syntax state machine @p ssm
**/
static ParserResult error(std::string_view ssm,
const DString * errmsg);
parser_result_type result_type() const { return result_type_; }
obj<AExpression> result_expr() const { return result_expr_; }
const DString * error_description() const { return error_description_; }
private:
parser_result_type result_type_ = parser_result_type::none;
obj<AExpression> result_expr_;
std::string_view error_src_fn_;
const DString * error_description_ = nullptr;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ParserResult.hpp */

View file

@ -0,0 +1,47 @@
/** @file ParserStack.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include "SyntaxStateMachine.hpp"
#include <xo/alloc2/Allocator.hpp>
#include <xo/facet/obj.hpp>
namespace xo {
namespace scm {
/** @brief A stack of expression state machines
*
* Each state machine is dedicated to a particular syntax instance.
* The innermost machine is in xsm; machines for surrounding expressions
* are in progressively removed frames reached via parent links.
**/
class ParserStack {
public:
using AAllocator = xo::mm::AAllocator;
public:
ParserStack(obj<ASyntaxStateMachine> ssm, ParserStack * parent);
/** create new top of stack for syntax @p ssm, using memory from @p mm.
* previous stack given by @p parent
**/
ParserStack * push(obj<AAllocator> mm,
obj<ASyntaxStateMachine> ssm);
obj<ASyntaxStateMachine> top() const noexcept { return ssm_; }
ParserStack * parent() const noexcept { return parent_; }
private:
/** top of parsing stack: always non-null **/
obj<ASyntaxStateMachine> ssm_;
/** remainder of parsing stack excluding top **/
ParserStack * parent_ = nullptr;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ParserStack.hpp */

View file

@ -0,0 +1,115 @@
/** @file ParserStateMachine.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include "ParserResult.hpp"
#include <xo/tokenizer2/Token.hpp>
#include <xo/alloc2/Allocator.hpp>
#include <xo/arena/DArena.hpp>
namespace xo {
namespace scm {
// defined in ssm/ASyntaxStateMachine.hpp, but
// including here would create include cycle
//
class ASyntaxStateMachine;
// note: load-bearing to forward-declare ParserStack,
// because ASyntaxStateMachine.hpp includes ParserStateMachine.hpp;
// before obj<SyntaxStateMachine> is defined.
class ParserStack;
/** @brief State machine embodying Schematika parser
**/
class ParserStateMachine {
public:
using AAllocator = xo::mm::AAllocator;
using ArenaConfig = xo::mm::ArenaConfig;
using DArena = xo::mm::DArena;
public:
ParserStateMachine(const ArenaConfig & config);
/** @defgroup scm-parserstatemachine-bookkeeping bookkeeping methods **/
///@{
/** push syntax @p ssm onto @ref stack_ **/
void push_ssm(obj<ASyntaxStateMachine> ssm);
///@}
/** @defgroup scm-parserstatemachine-inputmethods input methods **/
///@{
/** update state to respond to input token @p tk.
* record output (if any) in @ref result_
**/
void on_token(const Token & tk);
/** update state for incoming if-token @p tk **/
void on_if_token(const Token & tk);
///@}
/** @defgroup scm-parserstatemachine-error-entrypoints error entry points **/
///@{
/** capture error message @p errmsg from @p ssm_name,
* as current state machine output.
*
* @p errmsg will have been allocated from the @p expr_alloc_ allocator
**/
void capture_error(std::string_view ssm_name,
const DString * errmsg);
/** report illegal input from syntax state machine @p ssm_name
* recognized on input token @p tk. @p expect_str describes
* expected input in that state
**/
void illegal_input_on_token(std::string_view ssm_name,
const Token & tk,
std::string_view expect_str);
///@}
private:
/** Arena for internal parsing stack.
* Must be owned exclusively because destructively
* modified as parser completes parsing of each sub-expression
*
* Contents will be a stack of ExprState instances
**/
DArena parser_alloc_;
/** parser stack. Memory from @ref parser_alloc_ **/
ParserStack * stack_ = nullptr;
/** Allocator for parsed expressions.
* Information available during subsequent execution
* (whether compiling or interpreting) must be stored here.
*
* Also use this allocator for error messages arising
* during parsing
*
* Memory use patterns for executions are not predictable,
* and benefit from garbage collection, e.g. DX1Collector.
*
* May alternatively be able to use DArena in a compile-only
* scenario, where top-level Expressions can be discarded
* once compiled.
**/
obj<AAllocator> expr_alloc_;
/** current output from parser **/
ParserResult result_;
/** true to enable debug output **/
bool debug_flag_ = false;
};
} /*namespace scm*/
} /*namespace xo*/
/* end ParserStateMachine.hpp */

View file

@ -0,0 +1,25 @@
/** @file Reader.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#include <xo/tokenizer2/Tokenizer.hpp>
namespace xo {
namespace scm {
/** @class Reader
* @brief Assemble Schematika expressions from lexical tokens
**/
class Reader {
public:
private:
/** tokenizer: assembles Schematika tokens from text **/
Tokenizer tokenizer_;
/** parser: assemble Schematika expressions from token sequences **/
ExpressionParser parser_;
};
} /*namespace scm*/
} /*namespace xo*/
/* end Reader.hpp */

View file

@ -0,0 +1,22 @@
/** @file SyntaxStateMachine.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for facet .hpp file:
* [facet.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ssm/ASyntaxStateMachine.hpp"
#include "ssm/ISyntaxStateMachine_Any.hpp"
#include "ssm/ISyntaxStateMachine_Xfer.hpp"
#include "ssm/RSyntaxStateMachine.hpp"
/* end SyntaxStateMachine.hpp */

View file

@ -0,0 +1,78 @@
/** @file ASyntaxStateMachine.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [abstract_facet.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
// includes (via {facet_includes})
#include "ParserStateMachine.hpp"
#include "syntaxstatetype.hpp"
#include <xo/tokenizer2/Token.hpp>
#include <xo/facet/obj.hpp>
#include <xo/facet/facet_implementation.hpp>
#include <xo/facet/typeseq.hpp>
// {pretex} here
namespace xo {
namespace scm {
using Copaque = const void *;
using Opaque = void *;
/**
Assistant to schematika parser dedicated to particular syntax
**/
class ASyntaxStateMachine {
public:
/** @defgroup scm-syntaxstatemachine-type-traits **/
///@{
// types
/** integer identifying a type **/
using typeseq = xo::facet::typeseq;
using Copaque = const void *;
using Opaque = void *;
///@}
/** @defgroup scm-syntaxstatemachine-methods **/
///@{
// const methods
/** RTTI: unique id# for actual runtime data representation **/
virtual typeseq _typeseq() const noexcept = 0;
/** identify a type of syntax state machine **/
virtual syntaxstatetype ssm_type(Copaque data) const noexcept = 0;
/** text describing expected/allowed input to this ssm in current state **/
virtual std::string_view get_expect_str(Copaque data) const noexcept = 0;
// nonconst methods
/** update state machine for incoming if-keyword-token @p tk **/
virtual void on_if_token(Opaque data, const Token & tk, ParserStateMachine * p_psm) = 0;
///@}
}; /*ASyntaxStateMachine*/
/** Implementation ISyntaxStateMachine_DRepr of ASyntaxStateMachine for state DRepr
* should provide a specialization:
*
* template <>
* struct xo::facet::FacetImplementation<ASyntaxStateMachine, DRepr> {
* using Impltype = ISyntaxStateMachine_DRepr;
* };
*
* then ISyntaxStateMachine_ImplType<DRepr> --> ISyntaxStateMachine_DRepr
**/
template <typename DRepr>
using ISyntaxStateMachine_ImplType = xo::facet::FacetImplType<ASyntaxStateMachine, DRepr>;
} /*namespace scm*/
} /*namespace xo*/
/* ASyntaxStateMachine.hpp */

View file

@ -0,0 +1,87 @@
/** @file ISyntaxStateMachine_Any.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ASyntaxStateMachine.hpp"
#include <xo/facet/obj.hpp>
namespace xo { namespace scm { class ISyntaxStateMachine_Any; } }
namespace xo {
namespace facet {
template <>
struct FacetImplementation<xo::scm::ASyntaxStateMachine,
DVariantPlaceholder>
{
using ImplType = xo::scm::ISyntaxStateMachine_Any;
};
}
}
namespace xo {
namespace scm {
/** @class ISyntaxStateMachine_Any
* @brief ASyntaxStateMachine implementation for empty variant instance
**/
class ISyntaxStateMachine_Any : public ASyntaxStateMachine {
public:
/** @defgroup scm-syntaxstatemachine-any-type-traits **/
///@{
/** integer identifying a type **/
using typeseq = xo::facet::typeseq;
///@}
/** @defgroup scm-syntaxstatemachine-any-methods **/
///@{
const ASyntaxStateMachine * iface() const { return std::launder(this); }
// from ASyntaxStateMachine
// const methods
typeseq _typeseq() const noexcept override { return s_typeseq; }
[[noreturn]] syntaxstatetype ssm_type(Copaque) const noexcept override { _fatal(); }
[[noreturn]] std::string_view get_expect_str(Copaque) const noexcept override { _fatal(); }
// nonconst methods
[[noreturn]] void on_if_token(Opaque, const Token &, ParserStateMachine *) override;
///@}
private:
/** @defgraoup scm-syntaxstatemachine-any-private-methods **/
///@{
[[noreturn]] static void _fatal();
///@}
public:
/** @defgroup scm-syntaxstatemachine-any-member-vars **/
///@{
static typeseq s_typeseq;
static bool _valid;
///@}
};
} /*namespace scm */
} /*namespace xo */
/* ISyntaxStateMachine_Any.hpp */

View file

@ -0,0 +1,64 @@
/** @file ISyntaxStateMachine_DExprSeqState.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/ISyntaxStateMachine_DExprSeqState.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_repr.hpp.j2]
* 3. idl for facet methods
* [idl/ISyntaxStateMachine_DExprSeqState.json5]
**/
#pragma once
#include "SyntaxStateMachine.hpp"
#include "SyntaxStateMachine.hpp"
#include "ssm/ISyntaxStateMachine_Xfer.hpp"
#include "DExprSeqState.hpp"
namespace xo { namespace scm { class ISyntaxStateMachine_DExprSeqState; } }
namespace xo {
namespace facet {
template <>
struct FacetImplementation<xo::scm::ASyntaxStateMachine,
xo::scm::DExprSeqState>
{
using ImplType = xo::scm::ISyntaxStateMachine_Xfer
<xo::scm::DExprSeqState,
xo::scm::ISyntaxStateMachine_DExprSeqState>;
};
}
}
namespace xo {
namespace scm {
/** @class ISyntaxStateMachine_DExprSeqState
**/
class ISyntaxStateMachine_DExprSeqState {
public:
/** @defgroup scm-syntaxstatemachine-dexprseqstate-type-traits **/
///@{
using Copaque = xo::scm::ASyntaxStateMachine::Copaque;
using Opaque = xo::scm::ASyntaxStateMachine::Opaque;
///@}
/** @defgroup scm-syntaxstatemachine-dexprseqstate-methods **/
///@{
// const methods
/** identify a type of syntax state machine **/
static syntaxstatetype ssm_type(const DExprSeqState & self) noexcept;
/** text describing expected/allowed input to this ssm in current state **/
static std::string_view get_expect_str(const DExprSeqState & self) noexcept;
// non-const methods
/** update state machine for incoming if-keyword-token @p tk **/
static void on_if_token(DExprSeqState & self, const Token & tk, ParserStateMachine * p_psm);
///@}
};
} /*namespace scm*/
} /*namespace xo*/
/* end */

View file

@ -0,0 +1,88 @@
/** @file ISyntaxStateMachine_Xfer.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ParserStateMachine.hpp"
#include "syntaxstatetype.hpp"
#include <xo/tokenizer2/Token.hpp>
namespace xo {
namespace scm {
/** @class ISyntaxStateMachine_Xfer
**/
template <typename DRepr, typename ISyntaxStateMachine_DRepr>
class ISyntaxStateMachine_Xfer : public ASyntaxStateMachine {
public:
/** @defgroup scm-syntaxstatemachine-xfer-type-traits **/
///@{
/** actual implementation (not generated; often delegates to DRepr) **/
using Impl = ISyntaxStateMachine_DRepr;
/** integer identifying a type **/
using typeseq = ASyntaxStateMachine::typeseq;
///@}
/** @defgroup scm-syntaxstatemachine-xfer-methods **/
///@{
static const DRepr & _dcast(Copaque d) { return *(const DRepr *)d; }
static DRepr & _dcast(Opaque d) { return *(DRepr *)d; }
// from ASyntaxStateMachine
// const methods
typeseq _typeseq() const noexcept override { return s_typeseq; }
syntaxstatetype ssm_type(Copaque data) const noexcept override {
return I::ssm_type(_dcast(data));
}
std::string_view get_expect_str(Copaque data) const noexcept override {
return I::get_expect_str(_dcast(data));
}
// non-const methods
void on_if_token(Opaque data, const Token & tk, ParserStateMachine * p_psm) override {
return I::on_if_token(_dcast(data), tk, p_psm);
}
///@}
private:
using I = Impl;
public:
/** @defgroup scm-syntaxstatemachine-xfer-member-vars **/
///@{
/** typeseq for template parameter DRepr **/
static typeseq s_typeseq;
/** true iff satisfies facet implementation **/
static bool _valid;
///@}
};
template <typename DRepr, typename ISyntaxStateMachine_DRepr>
xo::facet::typeseq
ISyntaxStateMachine_Xfer<DRepr, ISyntaxStateMachine_DRepr>::s_typeseq
= xo::facet::typeseq::id<DRepr>();
template <typename DRepr, typename ISyntaxStateMachine_DRepr>
bool
ISyntaxStateMachine_Xfer<DRepr, ISyntaxStateMachine_DRepr>::_valid
= xo::facet::valid_facet_implementation<ASyntaxStateMachine,
ISyntaxStateMachine_Xfer>();
} /*namespace scm */
} /*namespace xo*/
/* end ISyntaxStateMachine_Xfer.hpp */

View file

@ -0,0 +1,85 @@
/** @file RSyntaxStateMachine.hpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/SyntaxStateMachine.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/SyntaxStateMachine.json5]
**/
#pragma once
#include "ASyntaxStateMachine.hpp"
namespace xo {
namespace scm {
/** @class RSyntaxStateMachine
**/
template <typename Object>
class RSyntaxStateMachine : public Object {
private:
using O = Object;
public:
/** @defgroup scm-syntaxstatemachine-router-type-traits **/
///@{
using ObjectType = Object;
using DataPtr = Object::DataPtr;
using typeseq = xo::reflect::typeseq;
///@}
/** @defgroup scm-syntaxstatemachine-router-ctors **/
///@{
RSyntaxStateMachine() {}
RSyntaxStateMachine(Object::DataPtr data) : Object{std::move(data)} {}
RSyntaxStateMachine(const ASyntaxStateMachine * iface, void * data)
requires std::is_same_v<typename Object::DataType, xo::facet::DVariantPlaceholder>
: Object(iface, data) {}
///@}
/** @defgroup scm-syntaxstatemachine-router-methods **/
///@{
// const methods
typeseq _typeseq() const noexcept { return O::iface()->_typeseq(); }
syntaxstatetype ssm_type() const noexcept {
return O::iface()->ssm_type(O::data());
}
std::string_view get_expect_str() const noexcept {
return O::iface()->get_expect_str(O::data());
}
// non-const methods (still const in router!)
void on_if_token(const Token & tk, ParserStateMachine * p_psm) {
return O::iface()->on_if_token(O::data(), tk, p_psm);
}
///@}
/** @defgroup scm-syntaxstatemachine-member-vars **/
///@{
static bool _valid;
///@}
};
template <typename Object>
bool
RSyntaxStateMachine<Object>::_valid = xo::facet::valid_object_router<Object>();
} /*namespace scm*/
} /*namespace xo*/
namespace xo { namespace facet {
template <typename Object>
struct RoutingFor<xo::scm::ASyntaxStateMachine, Object> {
using RoutingType = xo::scm::RSyntaxStateMachine<Object>;
};
} }
/* end RSyntaxStateMachine.hpp */

View file

@ -0,0 +1,38 @@
/** @file syntaxstatetype.hpp
*
* @author Roland Conybeare, Jan 2026
**/
#pragma once
#include <ostream>
namespace xo {
namespace scm {
/** @enum syntaxstatemachine
* @brief Label a specialized parsing state machine
*
* Label for a schematika syntax state machine
* dedicated to some particular piece of syntax
**/
enum class syntaxstatetype {
invalid = -1,
/** toplevel of some translation unit. See @ref DExprSeqState **/
expect_toplevel_expression_sequence,
/** comes lasts, counts number of valid enums **/
N
};
const char * syntaxstatetype_descr(syntaxstatetype x);
inline std::ostream &
operator<< (std::ostream & os, syntaxstatetype x) {
os << syntaxstatetype_descr(x);
return os;
}
}
} /*namespace xo*/
/* end syntaxstatetype.hpp */

View file

@ -0,0 +1,30 @@
# reader2/CMakeLists.txt
set(SELF_LIB xo_reader2)
set(SELF_SRCS
#init_reader2.cpp
ParserStateMachine.cpp
ParserStack.cpp
ParserResult.cpp
ISyntaxStateMachine_Any.cpp
DExprSeqState.cpp
ISyntaxStateMachine_DExprSeqState.cpp
#reader2_register_facets.cpp
#reader2_register_types.cpp
)
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
# note: deps here must also appear in cmake/xo_expression2Config.cmake.in
xo_dependency(${SELF_LIB} xo_gc)
xo_dependency(${SELF_LIB} xo_tokenizer2)
xo_dependency(${SELF_LIB} xo_expression2)
#xo_dependency(${SELF_LIB} reflect)
#xo_dependency(${SELF_LIB} xo_object2)
#xo_dependency(${SELF_LIB} xo_printable2)
#xo_dependency(${SELF_LIB} xo_flatstring)
#xo_dependency(${SELF_LIB} subsys)
#xo_dependency(${SELF_LIB} indentlog)

View file

@ -0,0 +1,101 @@
/** @file DExprSeqState.cpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "DExprSeqState.hpp"
#include "ssm/ISyntaxStateMachine_DExprSeqState.hpp"
namespace xo {
using xo::mm::AAllocator;
using xo::facet::with_facet;
using xo::reflect::typeseq;
namespace scm {
DExprSeqState::DExprSeqState(exprseqtype ty) : seqtype_{ty}
{}
namespace {
obj<ASyntaxStateMachine>
make_exprseq_ssm(obj<AAllocator> mm,
exprseqtype seqtype)
{
void * mem = mm.alloc(typeseq::id<DExprSeqState>(),
sizeof(DExprSeqState));
DExprSeqState * ssm = new (mem) DExprSeqState(seqtype);
return with_facet<ASyntaxStateMachine>::mkobj(ssm);
}
}
void
DExprSeqState::start_interactive(obj<AAllocator> mm,
ParserStateMachine * p_psm)
{
p_psm->push_ssm(make_exprseq_ssm(mm,
exprseqtype::toplevel_interactive));
}
void
DExprSeqState::start_batch(obj<AAllocator> mm,
ParserStateMachine * p_psm)
{
(void)mm;
(void)p_psm;
#ifdef NOT_YET
p_psm->push_ssm(make_exprseq_ssm(mm,
exprseqtype::toplevel_batch));
#endif
}
// SyntaxStateMachine facet methods
syntaxstatetype
DExprSeqState::ssm_type() const noexcept
{
return syntaxstatetype::expect_toplevel_expression_sequence;
}
std::string_view
DExprSeqState::get_expect_str() const noexcept
{
// TODO: provisional. Will expand as more syntax implemented
switch (seqtype_) {
case exprseqtype::toplevel_interactive:
return "def|expression|...";
case exprseqtype::toplevel_batch:
return "def|...";
case exprseqtype::N:
break;
}
assert(false);
return "impossible-DExprSeqState::get_expr_str";
}
void
DExprSeqState::on_if_token(const Token & tk,
ParserStateMachine * p_psm)
{
switch (seqtype_) {
case exprseqtype::toplevel_interactive:
assert(false); // DfElseState::start(p_psm);
break;
case exprseqtype::toplevel_batch:
p_psm->illegal_input_on_token("DExprSeqState::on_if_token",
tk,
this->get_expect_str());
break;
case exprseqtype::N:
assert(false); // unreachable
break;
}
}
} /*namespace scm*/
} /*namespace xo*/
/* end DExprSeqState.cpp */

View file

@ -0,0 +1,47 @@
/** @file ISyntaxStateMachine_Any.cpp
*
**/
#include "ssm/ISyntaxStateMachine_Any.hpp"
#include <iostream>
namespace xo {
namespace scm {
using xo::facet::DVariantPlaceholder;
using xo::facet::typeseq;
using xo::facet::valid_facet_implementation;
void
ISyntaxStateMachine_Any::_fatal()
{
/* control here on uninitialized IAllocator_Any.
* Initialized instance will have specific implementation type
*/
std::cerr << "fatal"
<< ": attempt to call uninitialized"
<< " ISyntaxStateMachine_Any method"
<< std::endl;
std::terminate();
}
typeseq
ISyntaxStateMachine_Any::s_typeseq = typeseq::id<DVariantPlaceholder>();
bool
ISyntaxStateMachine_Any::_valid
= valid_facet_implementation<ASyntaxStateMachine, ISyntaxStateMachine_Any>();
// nonconst methods
auto
ISyntaxStateMachine_Any::on_if_token(Opaque, const Token &, ParserStateMachine *) -> void
{
_fatal();
}
} /*namespace scm*/
} /*namespace xo*/
/* end ISyntaxStateMachine_Any.cpp */

View file

@ -0,0 +1,39 @@
/** @file ISyntaxStateMachine_DExprSeqState.cpp
*
* Generated automagically from ingredients:
* 1. code generator:
* [/home/roland/proj/xo-umbrella2-claude1/xo-facet/codegen/genfacet]
* arguments:
* --input [idl/ISyntaxStateMachine_DExprSeqState.json5]
* 2. jinja2 template for abstract facet .hpp file:
* [iface_facet_any.hpp.j2]
* 3. idl for facet methods
* [idl/ISyntaxStateMachine_DExprSeqState.json5]
**/
#include "ssm/ISyntaxStateMachine_DExprSeqState.hpp"
namespace xo {
namespace scm {
auto
ISyntaxStateMachine_DExprSeqState::ssm_type(const DExprSeqState & self) noexcept -> syntaxstatetype
{
return self.ssm_type();
}
auto
ISyntaxStateMachine_DExprSeqState::get_expect_str(const DExprSeqState & self) noexcept -> std::string_view
{
return self.get_expect_str();
}
auto
ISyntaxStateMachine_DExprSeqState::on_if_token(DExprSeqState & self, const Token & tk, ParserStateMachine * p_psm) -> void
{
self.on_if_token(tk, p_psm);
}
} /*namespace scm*/
} /*namespace xo*/
/* end ISyntaxStateMachine_DExprSeqState.cpp */

View file

@ -0,0 +1,32 @@
/** @file ParserResult.cpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "ParserResult.hpp"
namespace xo {
namespace scm {
ParserResult::ParserResult(parser_result_type type,
obj<AExpression> expr,
std::string_view error_src_fn,
const DString * error_description)
: result_type_{type},
result_expr_{expr},
error_src_fn_{error_src_fn},
error_description_{error_description}
{}
ParserResult
ParserResult::error(std::string_view ssm_name,
const DString * errmsg)
{
return ParserResult(parser_result_type::error,
obj<AExpression>(),
ssm_name,
errmsg);
}
} /*namespace scm*/
} /*namespace xo*/
/* end ParserResult.cpp */

View file

@ -0,0 +1,28 @@
/** @file ParserStack.cpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "ParserStack.hpp"
#include "SyntaxStateMachine.hpp"
namespace xo {
using xo::facet::typeseq;
namespace scm {
ParserStack *
ParserStack::push(obj<AAllocator> mm,
obj<ASyntaxStateMachine> ssm)
{
void * mem = mm.alloc(typeseq::id<ParserStack>(),
sizeof(ParserStack));
return new (mem) ParserStack(ssm, parent_);
}
} /*namespace scm*/
} /*namespace xo*/
/* end ParserStack.cpp */

View file

@ -0,0 +1,134 @@
/** @file ParserStateMachine.cpp
*
* @author Roland Conybeare, Jan 2026
**/
#include "ParserStateMachine.hpp"
#include "ParserStack.hpp"
#include "SyntaxStateMachine.hpp"
#include <xo/alloc2/arena/IAllocator_DArena.hpp>
#include <xo/indentlog/scope.hpp>
#include <xo/indentlog/print/tostr.hpp>
#include <xo/indentlog/print/tag.hpp>
#include <stdexcept>
namespace xo {
using xo::facet::with_facet;
namespace scm {
void
ParserStateMachine::push_ssm(obj<ASyntaxStateMachine> ssm)
{
scope log(XO_DEBUG(debug_flag_));
// note: using parser_alloc_ for parser stack, since stacklike behavior
auto alloc = with_facet<AAllocator>::mkobj(&parser_alloc_);
this->stack_ = stack_->push(alloc, ssm);
}
void
ParserStateMachine::on_token(const Token & tk)
{
scope log(XO_DEBUG(debug_flag_), xtag("tk", tk));
if (!stack_) {
// parsing stack should always have toplevel expression sequence
throw std::runtime_error(tostr("unexpected empty parsing stack",
xtag("token", tk),
xtag("help", "do it the same. but better!")
));
}
switch (tk.tk_type()) {
case tokentype::tk_if:
this->on_if_token(tk);
break;
// all the not-yet handled cases
case tokentype::tk_invalid:
case tokentype::tk_bool:
case tokentype::tk_i64:
case tokentype::tk_f64:
case tokentype::tk_string:
case tokentype::tk_symbol:
case tokentype::tk_leftparen:
case tokentype::tk_rightparen:
case tokentype::tk_leftbracket:
case tokentype::tk_rightbracket:
case tokentype::tk_leftbrace:
case tokentype::tk_rightbrace:
case tokentype::tk_leftangle:
case tokentype::tk_rightangle:
case tokentype::tk_lessequal:
case tokentype::tk_greatequal:
case tokentype::tk_dot:
case tokentype::tk_comma:
case tokentype::tk_colon:
case tokentype::tk_doublecolon:
case tokentype::tk_semicolon:
case tokentype::tk_singleassign:
case tokentype::tk_assign:
case tokentype::tk_yields:
case tokentype::tk_plus:
case tokentype::tk_minus:
case tokentype::tk_star:
case tokentype::tk_slash:
case tokentype::tk_cmpeq:
case tokentype::tk_cmpne:
case tokentype::tk_type:
case tokentype::tk_def:
case tokentype::tk_lambda:
case tokentype::tk_then:
case tokentype::tk_else:
case tokentype::tk_let:
case tokentype::tk_in:
case tokentype::tk_end:
case tokentype::N:
throw std::runtime_error(tostr("NOT IMPLEMENTED",
xtag("token", tk)));
}
}
void
ParserStateMachine::on_if_token(const Token & tk)
{
scope log(XO_DEBUG(debug_flag_), xtag("tk", tk));
stack_->top().on_if_token(tk, this);
}
void
ParserStateMachine::capture_error(std::string_view ssm_name,
const DString * errmsg)
{
this->result_ = ParserResult::error(ssm_name, errmsg);
}
void
ParserStateMachine::illegal_input_on_token(std::string_view ssm_name,
const Token & tk,
std::string_view expect_str)
{
// TODO:
// - want to write error message using DArena
// - need something like log_streambuf and/or tostr() that's arena-aware
auto errmsg_string = tostr("Unexpected token for parsing state",
xtag("token", tk),
xtag("expecting", expect_str),
xtag("ssm", ssm_name),
xtag("via", "ParserStateMachine::illegal_input_on_token"));
auto errmsg = DString::from_view(expr_alloc_,
std::string_view(errmsg_string));
this->capture_error(ssm_name, errmsg);
}
} /*namespace scm*/
} /*namespace xo*/
/* end ParserStateMachine.cpp */

View file

@ -164,8 +164,8 @@ namespace xo {
/** keyword @c 'end' **/
tk_end,
/** counts number of entries **/
n_tokentype
/** comes last, counts number of entries **/
N
}; /*tokentype*/
/** String representation for enum value.

View file

@ -60,7 +60,7 @@ namespace xo {
CASE(tk_end);
case tokentype::tk_invalid:
case tokentype::n_tokentype:
case tokentype::N:
return "?tokentype";
}