diff --git a/include/xo/reader2/ParserStateMachine.hpp b/include/xo/reader2/ParserStateMachine.hpp index 339fa029..cae4a551 100644 --- a/include/xo/reader2/ParserStateMachine.hpp +++ b/include/xo/reader2/ParserStateMachine.hpp @@ -33,12 +33,25 @@ namespace xo { public: ParserStateMachine(const ArenaConfig & config); + /** @defgroup scm-parserstatemachine-accessors accessor methods **/ + ///@{ + + bool debug_flag() const noexcept { return debug_flag_; } + ParserStack * stack() const noexcept { return stack_; } + const ParserResult & result() const noexcept { return result_; } + obj expr_alloc() const noexcept { return expr_alloc_; } + + ///@} + /** @defgroup scm-parserstatemachine-bookkeeping bookkeeping methods **/ ///@{ /** push syntax @p ssm onto @ref stack_ **/ void push_ssm(obj ssm); + /** reset result to none **/ + void reset_result() { result_ = ParserResult(); } + ///@} /** @defgroup scm-parserstatemachine-inputmethods input methods **/ diff --git a/include/xo/reader2/SchematikaParser.hpp b/include/xo/reader2/SchematikaParser.hpp new file mode 100644 index 00000000..8b4688ce --- /dev/null +++ b/include/xo/reader2/SchematikaParser.hpp @@ -0,0 +1,231 @@ +/** @file SchematikaParser.hpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#pragma once + +#include "ParserStateMachine.hpp" +#include "ParserResult.hpp" +#include + +namespace xo { + namespace scm { + /** schematica parser + * + * Examples: + * + * decltype point + * + * // forward declarations + * decl pi : f64; + * decl fib(n : i32) -> i32; + * + * def pi = 3.14159265; // constant. = is single assignment + * + * def fib(n : i32) -> i32 { + * // nested defs ok + * def aux(n : i32, s1 : i32, s2 : i32) -> i32 { + * // or: + * // (n == 0) ? s1 : aux(n - 1, s1 + s2, s1) + * // + * if (n == 0) { + * s1; + * } else { + * aux(n - 1, s1 + s2, s1); + * } + * + * // or: + * // if (n == 0) ? s1 : aux(n - 1, s1 + s2, s1) + * } + * + * aux(n=n, s1=1, s2=0); + * } + * + * def x := "fu"; // non-constant + * x += "bar"; + * + * def anotherfib = lambda(n : i32) { fib(n) }; + * + * def any : object; + * def l : list = '(); + * + * deftype point :: {x : f64, y : f64}; + * deftype polar :: {arg : f64, mag : f64}; + * deftype converter :: (point -> polar); + * + * def polar2rect(pt : polar) -> point { + * point(x = pt.mag * cos(arg), + * y = pt.mag * sin(arg)); + * } + * + * Grammar: + * toplevel-program = $toplevel-expression(1); ..; $toplevel-expression(n) + * + * if interactive: + * toplevel-expression = expression + * else + * toplevel-expression = type-decl | define-expr + * + * type-decl = decltype $typename [<$tp1 .. $tpn>] + * expression = type-decl + * | define-expr + * | literal-expr + * | variable-expr + * | apply-expr + * | if-expr + * | lambda-expr + * | arithmetic-expr + * | block + * + * define-expr = type-decl + * | type-def + * | variable-def + * | function-decl + * | function-def + * + * type-def = deftype $typename [<$tp1 .. $tpn>] :: type-def-rhs + * type-def-rhs = object + * | bool + * | i128 | i64 | i32 | i16 | i8 + * | f128 | f64 | f32 | f16 + * | struct $typename { ($membername(i) : $typename(i))* } + * [end $typename] + * | tuple $typename { $typename(1), .., $typename(n) } + * [end $typename] + * | copytype $typename + * | subtype $typename { ($member(i) : $typename(i))* } + * + * variable-def = decl $varname [: $typename] [= expression] + * function-decl = decl $functionname($varname(1) : $typename(1), + * .., + * $varname(n) : $typename(n)) -> $typename[ret] + * function-def = def $functionname($varname(1) : $typename(1), + * .., + * $varname(n) : $typename(n)) [-> $typename[ret]] + * body-expr + * [ end $functionname ] + * literal-expr = boolean-literal + * | integer-literal + * | fp-literal + * | string-literal + * | symbol-literal + * | struct-literal + * + * + * boolean-literal = true | false + * + * variable-expr = $varname + * apply-expr = fn-expr(arg-expr(1), .., arg-expr(n)) + * fn-expr = expression + * arg-expr(i) = expression + * + * if-expr = if (test-expr) then-block else else-block + * | ((test-expr) ? then-expr : else-expr) + * test-expr = expression + * then-block = block + * else-block = block + * + * block = { (definition | expression)* } + * + * lambda-expr = lambda ($paramname(1) : $type(1), + * .., + * $paramname(n) : $type(n)) body-expr + * body-expr = expression + * + * arithmetic-expr = expression binop expression + * + * binop = + + * | - + * | * + * | / + * | | + * | & + * | ^ + * | == + * | != + * | < + * | <= + * | => + * | > + * + **/ + class SchematikaParser { + public: + using ArenaConfig = xo::mm::ArenaConfig; + using token_type = Token; + + public: + /** create parser in initial state; + * parser is ready to receive tokens via @ref include_token + * + * @p config arena configuration for parser memory + * @p debug_flag true to enable debug logging + **/ + SchematikaParser(const ArenaConfig & config, bool debug_flag); + + bool debug_flag() const { return debug_flag_; } + + /** true if parser is at top-level, + * i.e. ready for next top-level expression + **/ + bool is_at_toplevel() const; + + /** true iff parser contains state for an incomplete expression. + * For this to be true, parser must have consumed at least one token + * since end of last toplevel expression + **/ + bool has_incomplete_expr() const; + + /** put parser into state for beginning an interactive session. + **/ + void begin_interactive_session(); + + /** put parser into state for beginning of a translation unit + * (i.e. input stream) + **/ + void begin_translation_unit(); + + /** include next token @p tk and increment parser state. + * + * @param tk next input token + * @return parsed expression, if @p tk completes an expression. + * otherwise nullptr + **/ + const ParserResult & include_token(const token_type & tk); + + /** reset parsed result expression; use using return value from + * @ref include_token. Complicating api here to avoid copying ParserResult + * on each token + **/ + void reset_result(); + + /** reset to starting parsing state. + * use this after encountering an error, to avoid cascade of + * spurious secondary errors. particularly important when + * invoked as part of a REPL. + **/ + void reset_to_idle_toplevel(); + + /** print human-readable representation on stream @p os **/ + void print(std::ostream & os) const; + + private: + /** state machine **/ + ParserStateMachine psm_; + + /** debug flag (also stored in psm_) **/ + bool debug_flag_ = false; + }; /*SchematikaParser*/ + + inline std::ostream & + operator<< (std::ostream & os, + const SchematikaParser & x) { + x.print(os); + return os; + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end SchematikaParser.hpp */ diff --git a/src/reader2/CMakeLists.txt b/src/reader2/CMakeLists.txt index f6fc817f..f3eaace2 100644 --- a/src/reader2/CMakeLists.txt +++ b/src/reader2/CMakeLists.txt @@ -4,6 +4,7 @@ set(SELF_LIB xo_reader2) set(SELF_SRCS init_reader2.cpp + SchematikaParser.cpp ParserStateMachine.cpp ParserStack.cpp ParserResult.cpp diff --git a/src/reader2/ParserStateMachine.cpp b/src/reader2/ParserStateMachine.cpp index a691bb57..c2297da4 100644 --- a/src/reader2/ParserStateMachine.cpp +++ b/src/reader2/ParserStateMachine.cpp @@ -16,6 +16,13 @@ namespace xo { using xo::facet::with_facet; namespace scm { + ParserStateMachine::ParserStateMachine(const ArenaConfig & config) + : parser_alloc_{DArena::map(config)}, + expr_alloc_{with_facet::mkobj(&parser_alloc_)}, + debug_flag_{config.debug_flag_} + { + } + void ParserStateMachine::push_ssm(obj ssm) { diff --git a/src/reader2/SchematikaParser.cpp b/src/reader2/SchematikaParser.cpp new file mode 100644 index 00000000..712c55d8 --- /dev/null +++ b/src/reader2/SchematikaParser.cpp @@ -0,0 +1,92 @@ +/** @file SchematikaParser.cpp + * + * @author Roland Conybeare, Jan 2026 + **/ + +#include "SchematikaParser.hpp" +#include "ParserStateMachine.hpp" +#include "ParserStack.hpp" +#include "DExprSeqState.hpp" +#include +#include + +namespace xo { + using xo::tostr; + using xo::xtag; + + namespace scm { + // ----- SchematikaParser ----- + + SchematikaParser::SchematikaParser(const ArenaConfig & config, bool debug_flag) + : psm_{config}, + debug_flag_{debug_flag} + { + } + + bool + SchematikaParser::is_at_toplevel() const { + return psm_.stack() == nullptr; + } + + bool + SchematikaParser::has_incomplete_expr() const { + /* (don't count toplevel exprseq) */ + ParserStack * stack = psm_.stack(); + if (!stack) + return false; + return stack->parent() != nullptr; + } + + void + SchematikaParser::begin_interactive_session() { + DExprSeqState::start_interactive(psm_.expr_alloc(), &psm_); + } + + void + SchematikaParser::begin_translation_unit() { + DExprSeqState::start_batch(psm_.expr_alloc(), &psm_); + } + + const ParserResult & + SchematikaParser::include_token(const token_type & tk) + { + scope log(XO_DEBUG(debug_flag_), xtag("tk", tk)); + + if (psm_.stack() == nullptr) { + throw std::runtime_error(tostr("SchematikaParser::include_token", + ": parser not expecting input" + "(call parser.begin_translation_unit()..?)", + xtag("token", tk))); + } + + /* stack is non-empty */ + + psm_.on_token(tk); + + return psm_.result(); + } /*include_token*/ + + void + SchematikaParser::reset_result() + { + psm_.reset_result(); + } + + void + SchematikaParser::reset_to_idle_toplevel() + { + psm_.reset_stack(); + psm_.reset_result(); + } /*reset_to_idle_toplevel*/ + + void + SchematikaParser::print(std::ostream & os) const { + os << "" << std::endl; + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end SchematikaParser.cpp */