commit 5d2ee35fe67825b259642f5ee65408f4b0621f1f Author: Roland Conybeare Date: Wed Jul 31 23:37:51 2024 +1000 parser: initial implementation [wip - only handles 'def' expr diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..3d3a7826 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +# emacs workspace config +.projectile +# clangd working space (see emacs+lsp) +.cache +# typical cmake build directory (source-tree-nephew) +.build* +# symlink to builddir/compile_commands.json; should be set manually in dev sandbox +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..84eccb39 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,27 @@ +# xo-parser/CMakeLists.txt + +cmake_minimum_required(VERSION 3.10) + +project(xo_parser VERSION 0.1) + +include(GNUInstallDirs) +include(cmake/xo-bootstrap-macros.cmake) + +xo_cxx_toplevel_options3() + +# ---------------------------------------------------------------- +# c++ settings + +set(PROJECT_CXX_FLAGS "") +#set(PROJECT_CXX_FLAGS "-fconcepts-diagnostics-depth=2") +add_definitions(${PROJECT_CXX_FLAGS}) + +# ---------------------------------------------------------------- + +add_subdirectory(src/parser) +add_subdirectory(utest) + +# ---------------------------------------------------------------- +# provide find_package() support + +xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets) diff --git a/cmake/xo-bootstrap-macros.cmake b/cmake/xo-bootstrap-macros.cmake new file mode 100644 index 00000000..aba31169 --- /dev/null +++ b/cmake/xo-bootstrap-macros.cmake @@ -0,0 +1,35 @@ +# ---------------------------------------------------------------- +# for example: +# $ PREFIX=/usr/local # for example +# $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build +# +# will get +# CMAKE_MODULE_PATH +# from xo-cmake-config --cmake-module-path +# +# and expect .cmake macros in +# CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake +# ---------------------------------------------------------------- + +find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED) + +if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND") + message(FATAL "could not find xo-cmake-config executable") +endif() + +message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}") + +if (NOT XO_SUBMODULE_BUILD) + if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix)) + # default to typical install location for xo-project-macros + execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH) + message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + endif() +endif() + +# needs to have been installed somewhere on CMAKE_MODULE_PATH, +# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX) +# +include(xo_macros/xo_cxx) + +xo_cxx_bootstrap_message() diff --git a/cmake/xo_parserConfig.cmake.in b/cmake/xo_parserConfig.cmake.in new file mode 100644 index 00000000..6eadeb07 --- /dev/null +++ b/cmake/xo_parserConfig.cmake.in @@ -0,0 +1,8 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(xo_expression) +find_dependency(xo_tokenizer) +#find_dependency(subsys) +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") +check_required_components("@PROJECT_NAME@") diff --git a/include/xo/parser/parser.hpp b/include/xo/parser/parser.hpp new file mode 100644 index 00000000..92b8e046 --- /dev/null +++ b/include/xo/parser/parser.hpp @@ -0,0 +1,454 @@ +/* file parser.hpp + * + * author: Roland Conybeare, Jul 2024 + */ + +#pragma once + +#include "xo/expression/Expression.hpp" +#include "xo/tokenizer/token.hpp" +#include +#include + +namespace xo { + namespace scm { + // ----- exprir ----- + + enum class exprirtype { + invalid = -1, + + empty, + symbol, + expression, + + n_exprirtype + }; + + extern const char * + exprirtype_descr(exprirtype x); + + inline std::ostream & + operator<< (std::ostream & os, + exprirtype x) + { + os << exprirtype_descr(x); + return os; + } + + /** intermediate representation for some part of an expression + * + * Examples: + * 1. a variable name (but without type information) + **/ + class exprir { + public: + using Expression = xo::ast::Expression; + + public: + exprir() = default; + exprir(exprirtype xir_type, + const std::string & x) + : xir_type_{xir_type}, symbol_name_{x} {} + exprir(exprirtype xir_type, + rp expr) + : xir_type_{xir_type}, expr_{std::move(expr)} {} + + exprirtype xir_type() const { return xir_type_; } + const std::string & symbol_name() const { return symbol_name_; } + const rp & expr() const { return expr_; } + + void print(std::ostream & os) const; + + private: + /** IR type code **/ + exprirtype xir_type_ = exprirtype::invalid; + /** xir_type=symbol: a symbol (type or variable) name **/ + std::string symbol_name_; + /** xir_type=expression: a completed expression **/ + rp expr_; + }; + + inline std::ostream & + operator<< (std::ostream & os, const exprir & x) { + x.print(os); + return os; + } + + enum class exprstatetype { + invalid = -1, + + /** toplevel of some translation unit **/ + expect_toplevel_expression_sequence, + + def_0, + def_1, + def_2, + def_3, + def_4, + + expect_rhs_expression, + expect_symbol, + + n_exprstatetype + }; + + extern const char * + exprstatetype_descr(exprstatetype x); + + inline std::ostream & + operator<< (std::ostream & os, exprstatetype x) { + os << exprstatetype_descr(x); + return os; + } + + enum class expractiontype { + invalid = -1, + + push1, + push2, + keep, + emit, + pop, + + n_expractiontype + }; + + extern const char * + expractiontype_descr(expractiontype x); + + inline std::ostream & + operator<< (std::ostream & os, expractiontype x) { + os << expractiontype_descr(x); + return os; + } + + /** an action associated with parser response to an incoming lexical + **/ + class expraction { + public: + expraction() = default; + expraction(expractiontype action_type, + const exprir & expr_ir, + exprstatetype push_exs1, + exprstatetype push_exs2) + : action_type_{action_type}, expr_ir_{expr_ir}, + push_exs1_{push_exs1}, push_exs2_{push_exs2} + {} + + static expraction keep(); + static expraction emit(const exprir & ir); + static expraction push2(exprstatetype s1, exprstatetype s2); + + expractiontype action_type() const { return action_type_; } + const exprir & expr_ir() const { return expr_ir_; } + exprstatetype push_exs1() const { return push_exs1_; } + exprstatetype push_exs2() const { return push_exs2_; } + + void print(std::ostream & os) const; + + private: + /** + * push1: push new exprstate built from push_exs1_ + * push2: push new exprstate built from push_exs1_, + * followed by push_exs2_ + * keep: keep current exprstate (which will have updated inplace) + * pop: drop exprstate, report exprir to parent + **/ + expractiontype action_type_ = expractiontype::invalid; + /** + * intermediate representation (pass to enclosing stack state) + **/ + exprir expr_ir_; + /** with action_type push1 or push2, + * parser will push exprstate with this type + **/ + exprstatetype push_exs1_ = exprstatetype::invalid; + /** with action_type push2, + * parser will push exprstate with this type + * (after pushing exprstate built from push_exs1_) + **/ + exprstatetype push_exs2_ = exprstatetype::invalid; + }; + + inline std::ostream & + operator<< (std::ostream & os, + const expraction & x) + { + x.print(os); + return os; + } + + /** state associated with a partially-parsed expression. + **/ + class exprstate { + public: + using exprtype = xo::ast::exprtype; + using token_type = token; + + public: + exprstate() = default; + exprstate(exprstatetype exs_type) : exs_type_{exs_type} {} + + static exprstate expect_toplevel_expression_sequence() { + return exprstate(exprstatetype::expect_toplevel_expression_sequence); + } + static exprstate def_0() { + return exprstate(exprstatetype::def_0); + } + static exprstate expect_symbol() { + return exprstate(exprstatetype::expect_symbol); + } + + exprstatetype exs_type() const { return exs_type_; } + + /** true iff this parsing state admits a 'def' keyword + * as next token + **/ + bool admits_definition() const; + /** true iff this parsing state admits a symbol as next token **/ + bool admits_symbol() const; + /** true iff this parsing state admits a colon as next token **/ + bool admits_colon() const; + /** true iff this parsing state admits a singleassign '=' as next token **/ + bool admits_singleassign() const; + /** true iff this parsing state admits a 64-bit floating point literal token **/ + bool admits_f64() const; + + /** update exprstate in response to incoming token @p tk, + * forward instructions to parent parser + **/ + expraction on_input(const token_type & tk); + /** update exprstate in response to IR (intermediate representation) + * from nested parsing task + **/ + expraction on_exprir(const exprir & ir); + + /** print human-readable representation on @p os **/ + void print(std::ostream & os) const; + + private: + expraction on_def(); + expraction on_symbol(const token_type & tk); + expraction on_colon(); + expraction on_singleassign(); + expraction on_f64(const token_type & tk); + + private: + /** + * def foo : f64 = 1 + * ^ ^ ^ ^ ^ ^ ^ + * | | | | | | (done) + * | | | | | def_4:expect_rhs_expression + * | | | | def_3 + * | | | def_2:expect_symbol + * | | def_1 + * | def_0:expect_symbol + * expect_toplevel_expression_sequence + * + * def_0:expect_symbol: got 'def' keyword, symbol to follow + * def_1: got symbol name + * def_2:expect_symbol got (optional) colon, type name to follow + * def_3: got symbol type + * def_4:expect_rhs_expression got (optional) equal sign, value to follow + * (done): definition complete, pop exprstate from stack + * + **/ + exprstatetype exs_type_; + + /** e.g. foo in + * def foo : f64 = 1 + **/ + std::string def_lhs_symbol_; + /** e.g. f64 in + * def foo : f64 = 1 + **/ + std::string def_lhs_type_; + }; /*exprstate*/ + + inline std::ostream & + operator<< (std::ostream & os, const exprstate & x) { + x.print(os); + return os; + } + + /** schematica parser + * + * Examples: + * + * decltype point + * + * // forward declarations + * decl pi : f64 + * decl fib(n : i32) -> i32 + * + * def pi = 3.14159265 // constant. = is single assignment + * + * def fib(n : i32) -> i32 { + * // nested defs ok + * def aux(n : i32, s1 : i32, s2 : i32) -> i32 { + * // or: + * // (n == 0) ? s1 : aux(n - 1, s1 + s2, s1) + * // + * if (n == 0) { + * s1 + * } else { + * aux(n - 1, s1 + s2, s1) + * } + * + * // or: + * // if (n == 0) ? s1 : aux(n - 1, s1 + s2, s1) + * } + * + * aux(n=n, s1=1, s2=0) + * } + * + * def anotherfib = lambda(n : i32) { fib(n) } + * + * def any : object + * def l : list = '() + * + * deftype point :: {x : f64, y : f64} + * deftype polar :: {arg : f64, mag : f64} + * + * def polar2rect(pt : polar) -> point { + * point(x = pt.mag * cos(arg), + * y = pt.mag * sin(arg)) + * } + * + * Grammar: + * toplevel-program = expression* + * type-decl = decltype $typename [<$tp1 .. $tpn>] + * expression = define-expr + * | literal-expr + * | variable-expr + * | apply-expr + * | if-expr + * | lambda-expr + * | block + * + * define-expr = type-decl + * | type-def + * | variable-def + * | function-decl + * | function-def + * + * type-def = deftype $typename [<$tp1 .. $tpn>] :: type-def-rhs + * type-def-rhs = object + * | bool + * | i128 | i64 | i32 | i16 | i8 + * | f128 | f64 | f32 | f16 + * | struct $typename { ($membername(i) : $typename(i))* } + * [end $typename] + * | tuple $typename { $typename(1), .., $typename(n) } + * [end $typename] + * | copytype $typename + * | subtype $typename { ($member(i) : $typename(i))* } + * + * variable-def = decl $varname [: $typename] [= expression] + * function-decl = decl $functionname($varname(1) : $typename(1), + * .., + * $varname(n) : $typename(n)) -> $typename[ret] + * function-def = def $functionname($varname(1) : $typename(1), + * .., + * $varname(n) : $typename(n)) [-> $typename[ret]] + * body-expr + * [ end $functionname ] + * literal-expr = integer-literal + * | fp-literal + * | string-literal + * | symbol-literal + * | struct-literal + * + * variable-expr = $varname + * apply-expr = fn-expr(arg-expr(1), .., arg-expr(n)) + * fn-expr = expression + * arg-expr(i) = expression + * + * if-expr = if (test-expr) then-block else else-block + * | (test-expr) ? then-expr : else-expr + * test-expr = expression + * then-block = block + * else-block = block + * + * block = { (definition | expression)* } + * + * lambda-expr = lambda ($paramname(1) : $type(1), + * .., + * $paramname(n) : $type(n)) body-expr + * body-expr = expression + **/ + class parser { + public: + using Expression = xo::ast::Expression; + using token_type = exprstate::token_type; // token; + + public: + /** create parser in initial state; + * parser is ready to receive tokens via @ref include_token + **/ + parser() = default; + + /** for diagnostics: number of entries in parser stack **/ + std::size_t stack_size() const { return stack_.size(); } + /** for diagnostics: exprstatetype at level @p i + * (taken relative to top of stack) + * + * @pre 0 <= i < stack_size + **/ + exprstatetype i_exstype(std::size_t i) const { + std::size_t z = stack_.size(); + + if (i < z) { + return stack_[(z - 1) - i].exs_type(); + } + + /* out of bounds */ + return exprstatetype::invalid; + } + + /** put parser into state for beginning of a translation unit + * (i.e. input stream) + **/ + void begin_translation_unit(); + + /** include next token @p tk and increment parser state. + * + * @param tk next input token + * @return parsed expression, if @p tk completes an expression. + * otherwise nullptr + **/ + rp include_token(const token_type & tk); + + /** print human-readable representation on stream @p os **/ + void print(std::ostream & os) const; + + private: + exprstate & top_exprstate(); + void push_exprstate(const exprstate & exs); + void pop_exprstate(); + + private: + /** state recording state associated with enclosing expressions. + * + * Note: at least asof c++23, the std::stack api doesn't support access + * to members other than the top. + * + * for stack with N elements (N = stack_.size()): + * - bottom of stack is stack_[0] + * - top of stack is stack_[N-1] + **/ + std::vector stack_; + + }; /*parser*/ + + inline std::ostream & + operator<< (std::ostream & os, + const parser & x) { + x.print(os); + return os; + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end parser.hpp */ diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt new file mode 100644 index 00000000..2af43636 --- /dev/null +++ b/src/parser/CMakeLists.txt @@ -0,0 +1,11 @@ +# parser/CMakeLists.txt + +set(SELF_LIB xo_parser) +set(SELF_SRCS + parser.cpp) + +xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) +xo_dependency(${SELF_LIB} xo_expression) +xo_dependency(${SELF_LIB} xo_tokenizer) + +# end CMakeLists.txt diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp new file mode 100644 index 00000000..ab6404fd --- /dev/null +++ b/src/parser/parser.cpp @@ -0,0 +1,690 @@ +/* file parser.cpp + * + * author: Roland Conybeare + */ + +#include "parser.hpp" +#include "xo/expression/DefineExpr.hpp" +#include "xo/expression/Constant.hpp" +#include +#include + +namespace xo { + using xo::ast::Expression; + using xo::ast::DefineExpr; + using xo::ast::Constant; + + namespace scm { + const char * + exprirtype_descr(exprirtype x) { + switch(x) { + case exprirtype::invalid: + return "?invalid"; + case exprirtype::empty: + return "empty"; + case exprirtype::symbol: + return "symbol"; + case exprirtype::expression: + return "expression"; + case exprirtype::n_exprirtype: + break; + } + + return "???exprirtype"; + } + + void + exprir::print(std::ostream & os) const { + os << ""; + } + + const char * + exprstatetype_descr(exprstatetype x) { + switch(x) { + case exprstatetype::invalid: + return "?invalid"; + case exprstatetype::expect_toplevel_expression_sequence: + return "expect_toplevel_expression_sequence"; + case exprstatetype::def_0: + return "def_0"; + case exprstatetype::def_1: + return "def_1"; + case exprstatetype::def_2: + return "def_2"; + case exprstatetype::def_3: + return "def_3"; + case exprstatetype::def_4: + return "def_4"; + case exprstatetype::expect_rhs_expression: + return "expect_rhs_expression"; + case exprstatetype::expect_symbol: + return "expect_symbol"; + case exprstatetype::n_exprstatetype: + break; + } + + return "???"; + } + + const char * + expractiontype_descr(expractiontype x) { + switch(x) { + case expractiontype::invalid: + return "?invalid"; + case expractiontype::push1: + return "push1"; + case expractiontype::push2: + return "push2"; + case expractiontype::keep: + return "keep"; + case expractiontype::emit: + return "emit"; + case expractiontype::pop: + return "pop"; + case expractiontype::n_expractiontype: + break; + } + + return "???"; + } + + expraction + expraction::keep() { + return expraction(expractiontype::keep, + exprir(), + exprstatetype::invalid /*not used*/, + exprstatetype::invalid /*not used*/); + } + + expraction + expraction::emit(const exprir & ir) { + return expraction(expractiontype::emit, + ir, + exprstatetype::invalid /*not used*/, + exprstatetype::invalid /*not used*/); + } + + expraction + expraction::push2(exprstatetype s1, + exprstatetype s2) { + return expraction(expractiontype::push2, + exprir(), + s1, + s2); + } + + void + expraction::print(std::ostream & os) const { + os << ""; + } + + bool + exprstate::admits_definition() const { + switch(exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + return true; + + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + /* note for def_4: + * rhs could certainly be a function body that contains + * nested defines; but then immediately-enclosing-exprstate + * would be a block + */ + return false; + case exprstatetype::expect_rhs_expression: + return false; + case exprstatetype::expect_symbol: + return false; + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + return false; + } + } + + bool + exprstate::admits_symbol() const { + switch(exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + return false; + + case exprstatetype::expect_rhs_expression: + /* treat symbol as variable name */ + return true; + + case exprstatetype::expect_symbol: + return true; + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + return false; + } + } + + bool + exprstate::admits_colon() const { + switch(exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + case exprstatetype::def_0: + return false; + + case exprstatetype::def_1: + return true; + + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + case exprstatetype::expect_rhs_expression: + /* rhs-expressions (or expressions for that matter) + * may not begin with a colon + */ + case exprstatetype::expect_symbol: + return false; + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + return false; + } + } + + bool + exprstate::admits_singleassign() const { + switch(exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + return false; + + case exprstatetype::def_3: + return true; + + case exprstatetype::def_4: + case exprstatetype::expect_rhs_expression: + /* rhs-expressions (or expressions for that matter) + * may not begin with singleassign '=' + */ + case exprstatetype::expect_symbol: + return false; + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + return false; + } + } + + bool + exprstate::admits_f64() const { + switch(exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + return false; + + case exprstatetype::expect_rhs_expression: + return true; + + case exprstatetype::expect_symbol: + return false; + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + return false; + } + } + + expraction + exprstate::on_def() { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_def"; + + /* lots of illegal states */ + if (!this->admits_definition()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected keyword 'def' for parsing state", + xtag("state", *this))); + } + + /* keyword 'def' introduces a definition: + * def pi : f64 = 3.14159265 + * def sq(x : f64) -> f64 { (x * x) } + */ + return expraction::push2(exprstatetype::def_0, + /* todo: replace: + * expect_symbol_or_function_signature() + */ + exprstatetype::expect_symbol); + } + + expraction + exprstate::on_symbol(const token_type & tk) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_symbol"; + + if (!this->admits_symbol()) { + throw std::runtime_error + (tostr(self_name, + ": unexpected symbol-token for parsing state", + xtag("symbol", tk), + xtag("state", *this))); + } + + switch(this->exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + throw std::runtime_error + (tostr(self_name, + ": unexpected symbol-token at top-level", + " (expecting decl|def)", + xtag("symbol", tk))); + break; + + case exprstatetype::def_0: + case exprstatetype::def_1: + case exprstatetype::def_2: + case exprstatetype::def_3: + case exprstatetype::def_4: + /* unreachable */ + assert(false); + return expraction(); + + case exprstatetype::expect_rhs_expression: + case exprstatetype::expect_symbol: + return expraction(expractiontype::pop, + exprir(exprirtype::symbol, tk.text()), + exprstatetype::invalid /*not used*/, + exprstatetype::invalid /*not used*/); + + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + assert(false); + return expraction(); + } + } + + expraction + exprstate::on_colon() { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_colon"; + + /* lots of illegal states */ + if (!this->admits_colon()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected colon for parsing state", + xtag("state", *this))); + } + + if (this->exs_type_ == exprstatetype::def_1) { + this->exs_type_ = exprstatetype::def_2; + + return expraction(expractiontype::push1, + exprir(), + exprstatetype::expect_symbol, + exprstatetype::invalid /*not used*/); + } else { + assert(false); + return expraction(); + } + } + + expraction + exprstate::on_singleassign() { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_singleassign"; + + if (!this->admits_singleassign()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected equals for parsing state", + xtag("state", *this))); + } + + if (this->exs_type_ == exprstatetype::def_3) { + this->exs_type_ = exprstatetype::def_4; + + return expraction(expractiontype::push1, + exprir(), + exprstatetype::expect_rhs_expression, + exprstatetype::invalid /*not used*/); + } else { + assert(false); + return expraction(); + } + } + + expraction + exprstate::on_f64(const token_type & tk) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + constexpr const char * self_name = "exprstate::on_f64"; + + if (!this->admits_f64()) + { + throw std::runtime_error(tostr(self_name, + ": unexpected floating-point literal for parsing state", + xtag("state", *this))); + } + + if (this->exs_type_ == exprstatetype::expect_rhs_expression) { + return expraction(expractiontype::pop, + exprir(exprirtype::expression, + Constant::make(tk.f64_value())), + exprstatetype::invalid /*not used*/, + exprstatetype::invalid /*not used*/); + } else { + assert(false); + return expraction(); + } + } + + expraction + exprstate::on_input(const token_type & tk) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + log && log(xtag("tk", tk)); + log && log(xtag("state", *this)); + + switch(tk.tk_type()) { + + case tokentype::tk_def: + return this->on_def(); + + case tokentype::tk_i64: + assert(false); + return expraction(); + + case tokentype::tk_f64: + return this->on_f64(tk); + + case tokentype::tk_string: + assert(false); + return expraction(); + + case tokentype::tk_symbol: + return this->on_symbol(tk); + + case tokentype::tk_leftparen: + + case tokentype::tk_rightparen: + case tokentype::tk_leftbracket: + case tokentype::tk_rightbracket: + case tokentype::tk_leftbrace: + case tokentype::tk_rightbrace: + + case tokentype::tk_leftangle: + case tokentype::tk_rightangle: + case tokentype::tk_dot: + case tokentype::tk_comma: + assert(false); + return expraction(); + + case tokentype::tk_colon: + return this->on_colon(); + + case tokentype::tk_doublecolon: + case tokentype::tk_semicolon: + assert(false); + return expraction(); + + case tokentype::tk_singleassign: + return this->on_singleassign(); + + case tokentype::tk_assign: + case tokentype::tk_yields: + + case tokentype::tk_type: + case tokentype::tk_lambda: + case tokentype::tk_if: + case tokentype::tk_let: + + case tokentype::tk_in: + case tokentype::tk_end: + assert(false); + return expraction(); + + case tokentype::tk_invalid: + case tokentype::n_tokentype: + assert(false); + return expraction(); + } + + assert(false); + return expraction(); + } + + expraction + exprstate::on_exprir(const exprir & ir) { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + log && log(xtag("ir", ir)); + log && log(xtag("state", *this)); + + switch(this->exs_type_) { + case exprstatetype::expect_toplevel_expression_sequence: + /* toplevel expression sequence accepts an + * arbitrary number of expressions. + * + * parser::include_token() returns + */ + + if (ir.xir_type() == exprirtype::expression) + return expraction::emit(ir); + + /* NOT IMPLEMENTED */ + assert(false); + return expraction(); + case exprstatetype::def_0: + this->exs_type_ = exprstatetype::def_1; + this->def_lhs_symbol_ = ir.symbol_name(); + + return expraction::keep(); + case exprstatetype::def_1: + /* NOT IMPLEMENTED */ + assert(false); + return expraction(); + case exprstatetype::def_2: + this->exs_type_ = exprstatetype::def_3; + this->def_lhs_type_ = ir.symbol_name(); + + return expraction::keep(); + case exprstatetype::def_3: + /* NOT IMPLEMENTED */ + assert(false); + return expraction(); + case exprstatetype::def_4: + /* have all the ingredients to create an expression + * representing a definition + * + * 1. if ir_type is a symbol, interpret as variable name. + * Need to be able to locate variable by type + * 2. if ir_type is an expression, adopt as rhs + */ + if (ir.xir_type() == exprirtype::expression) { + /* TODO: do something with def_lhs_type */ + + rp rhs_value = ir.expr(); + rp def + = DefineExpr::make(this->def_lhs_symbol_, + rhs_value); + + return expraction(expractiontype::pop, + exprir(exprirtype::expression, def), + exprstatetype::invalid /*not used*/, + exprstatetype::invalid /*not used*/); + } else { + assert(false); + return expraction(); + } + + case exprstatetype::expect_rhs_expression: + case exprstatetype::expect_symbol: + /* unreachable + * (this exprstate issues pop instruction from exprstate::on_input() + */ + assert(false); + return expraction(); + case exprstatetype::invalid: + case exprstatetype::n_exprstatetype: + /* unreachable */ + assert(false); + return expraction(); + } + } + + void + exprstate::print(std::ostream & os) const { + os << ""; + } + + // ----- parser ----- + + exprstate & + parser::top_exprstate() { + std::size_t z = stack_.size(); + + if (z == 0) { + throw std::runtime_error + ("parser::top_exprstate: unexpected empty stack"); + } + + return stack_[z-1]; + } + + void + parser::push_exprstate(const exprstate & exs) { + std::size_t z = stack_.size(); + + stack_.resize(z+1); + + stack_[z] = exs; + } + + void + parser::pop_exprstate() { + std::size_t z = stack_.size(); + + if (z > 0) + stack_.resize(z-1); + } + + void + parser::begin_translation_unit() { + this->push_exprstate + (exprstate::expect_toplevel_expression_sequence()); + } + + rp + parser::include_token(const token_type & tk) + { + constexpr bool c_debug_flag = true; + scope log(XO_DEBUG(c_debug_flag)); + + if (stack_.empty()) { + throw std::runtime_error(tostr("parser::include_token", + ": parser not expecting input" + "(call parser.begin_translation_unit()..?)", + xtag("token", tk))); + } + + /* stack_ is non-empty */ + expraction action = this->top_exprstate().on_input(tk); + + /* loop until reach parsing state that requires more input */ + for (;;) { + log && log(xtag("action", action)); + + switch(action.action_type()) { + case expractiontype::keep: + return nullptr; + + case expractiontype::emit: + return action.expr_ir().expr(); + + case expractiontype::pop: + this->pop_exprstate(); + + if (stack_.empty()) { + throw std::runtime_error(tostr("parser::include_token", + ": pop leaves empty stack")); + } + + action = this->top_exprstate().on_exprir(action.expr_ir()); + break; + + case expractiontype::push1: + this->push_exprstate(action.push_exs1()); + return nullptr; + + case expractiontype::push2: + this->push_exprstate(action.push_exs1()); + this->push_exprstate(action.push_exs2()); + return nullptr; + + case expractiontype::invalid: + case expractiontype::n_expractiontype: + /* unreachable */ + assert(false); + return nullptr; + } + } + } /*include_token*/ + + void + parser::print(std::ostream & os) const { + os << "" << std::endl; + } + } /*namespace scm*/ +} /*namespace xo*/ + + +/* end parser.cpp */ diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt new file mode 100644 index 00000000..d70b7d4d --- /dev/null +++ b/utest/CMakeLists.txt @@ -0,0 +1,16 @@ +# xo-parser/utest/CMakeLists.txt + +set(UTEST_EXE utest.parser) +set(UTEST_SRCS + parser_utest_main.cpp + parser.test.cpp) + +if (ENABLE_TESTING) + xo_add_utest_executable(${UTEST_EXE} ${UTEST_SRCS}) + xo_self_dependency(${UTEST_EXE} xo_parser) + #xo_dependency(${UTEST_EXE} xo_ratio) + #xo_dependency(${UTEST_EXE} xo_reflectutil) + xo_external_target_dependency(${UTEST_EXE} Catch2 Catch2::Catch2) +endif() + +# end CMakeLists.txt diff --git a/utest/parser.test.cpp b/utest/parser.test.cpp new file mode 100644 index 00000000..d5be7db7 --- /dev/null +++ b/utest/parser.test.cpp @@ -0,0 +1,199 @@ +/* file parser.test.cpp + * + * author: Roland Conybeare + */ + +#include "xo/parser/parser.hpp" +#include + +namespace xo { + using parser_type = xo::scm::parser; + using token_type = parser_type::token_type; + using xo::scm::exprstatetype; + using std::cerr; + using std::endl; + + //using xo::ast::Expression; + + namespace ut { + TEST_CASE("parser", "[parser]") { + parser_type parser; + + parser.begin_translation_unit(); + + REQUIRE(parser.stack_size() == 1); + REQUIRE(parser.i_exstype(0) + == exprstatetype::expect_toplevel_expression_sequence); + + /* input: + * def + */ + { + auto r1 = parser.include_token(token_type::def()); + REQUIRE(r1.get() == nullptr); + + /* stack should be: + * + * expect_toplevel_expression_sequence + * def_0 + * expect_symbol + */ + CHECK(parser.stack_size() == 3); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) == exprstatetype::expect_symbol); + if (parser.stack_size() > 1) + CHECK(parser.i_exstype(1) == exprstatetype::def_0); + if (parser.stack_size() > 2) + CHECK(parser.i_exstype(2) + == exprstatetype::expect_toplevel_expression_sequence); + } + + /* input: + * def foo + * ^ ^ + * 0 1 + */ + { + auto r2 = parser.include_token(token_type::symbol_token("foo")); + + cerr << "parser state after [def foo]" << endl; + cerr << parser << endl; + + REQUIRE(r2.get() == nullptr); + + /* stack should be: + * + * expect_toplevel_expression_sequence + * def_1 + */ + CHECK(parser.stack_size() == 2); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) == exprstatetype::def_1); + if (parser.stack_size() > 1) + CHECK(parser.i_exstype(1) + == exprstatetype::expect_toplevel_expression_sequence); + + } + + /* input: + * def foo : + * ^ ^ + * 0 1 + */ + { + auto r3 = parser.include_token(token_type::colon()); + + cerr << "parser state after [def foo :]" << endl; + cerr << parser << endl; + + REQUIRE(r3.get() == nullptr); + + /* stack should be: + * + * expect_toplevel_expression_sequence + * def_2 + * expect_symbol + */ + CHECK(parser.stack_size() == 3); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) == exprstatetype::expect_symbol); + if (parser.stack_size() > 1) + CHECK(parser.i_exstype(1) == exprstatetype::def_2); + if (parser.stack_size() > 2) + CHECK(parser.i_exstype(2) + == exprstatetype::expect_toplevel_expression_sequence); + } + + /* input: + * def foo : footype + * ^ ^ + * 0 1 + */ + { + auto r4 = parser.include_token(token_type::symbol_token("footype")); + + cerr << "parser state after [def foo : footype]" << endl; + cerr << parser << endl; + + REQUIRE(r4.get() == nullptr); + + CHECK(parser.stack_size() == 2); + + /* stack should be: + * + * expect_toplevel_expression_sequence + * def_3 + */ + CHECK(parser.stack_size() == 2); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) == exprstatetype::def_3); + if (parser.stack_size() > 1) + CHECK(parser.i_exstype(1) + == exprstatetype::expect_toplevel_expression_sequence); + + /* expecting either: + * = rhs-expression + * new-expression + */ + } + + /* input: + * def foo : footype = + * ^ ^ + * 0 1 + */ + { + auto r5 = parser.include_token(token_type::singleassign()); + + cerr << "parser state after [def foo : footype =]" << endl; + cerr << parser << endl; + + REQUIRE(r5.get() == nullptr); + + CHECK(parser.stack_size() == 3); + + /* stack should be + * + * expect_toplevel_expression_sequence + * def_4 + * expect_expression + */ + CHECK(parser.stack_size() == 3); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) == exprstatetype::expect_rhs_expression); + if (parser.stack_size() > 1) + CHECK(parser.i_exstype(1) == exprstatetype::def_4); + if (parser.stack_size() > 2) + CHECK(parser.i_exstype(2) + == exprstatetype::expect_toplevel_expression_sequence); + } + + /* input: + * def foo : footype = 3.14159265 + * ^ ^ + * 0 1 + */ + { + auto r6 = parser.include_token(token_type::f64_token("3.14159265")); + + cerr << "parser state after [def foo : footype = 3.14159265]" << endl; + cerr << parser << endl; + + REQUIRE(r6.get() != nullptr); + + CHECK(parser.stack_size() == 1); + + /* stack should be + * + * expect_toplevel_expression_sequence + */ + CHECK(parser.stack_size() == 1); + if (parser.stack_size() > 0) + CHECK(parser.i_exstype(0) + == exprstatetype::expect_toplevel_expression_sequence); + } + } /*TEST_CASE(parser)*/ + } /*namespace ut*/ +} /*namespace xo*/ + +/* end parser.test.cpp */ diff --git a/utest/parser_utest_main.cpp b/utest/parser_utest_main.cpp new file mode 100644 index 00000000..d1013151 --- /dev/null +++ b/utest/parser_utest_main.cpp @@ -0,0 +1,6 @@ +/* file parser_utest_main.cpp */ + +#define CATCH_CONFIG_MAIN +#include + +/* end parser_utest_main.cpp */