parser: initial implementation [wip - only handles 'def' expr

This commit is contained in:
Roland Conybeare 2024-07-31 23:37:51 +10:00
commit 5d2ee35fe6
10 changed files with 1454 additions and 0 deletions

8
.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# emacs workspace config
.projectile
# clangd working space (see emacs+lsp)
.cache
# typical cmake build directory (source-tree-nephew)
.build*
# symlink to builddir/compile_commands.json; should be set manually in dev sandbox
compile_commands.json

27
CMakeLists.txt Normal file
View file

@ -0,0 +1,27 @@
# xo-parser/CMakeLists.txt
cmake_minimum_required(VERSION 3.10)
project(xo_parser VERSION 0.1)
include(GNUInstallDirs)
include(cmake/xo-bootstrap-macros.cmake)
xo_cxx_toplevel_options3()
# ----------------------------------------------------------------
# c++ settings
set(PROJECT_CXX_FLAGS "")
#set(PROJECT_CXX_FLAGS "-fconcepts-diagnostics-depth=2")
add_definitions(${PROJECT_CXX_FLAGS})
# ----------------------------------------------------------------
add_subdirectory(src/parser)
add_subdirectory(utest)
# ----------------------------------------------------------------
# provide find_package() support
xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)

View file

@ -0,0 +1,35 @@
# ----------------------------------------------------------------
# for example:
# $ PREFIX=/usr/local # for example
# $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build
#
# will get
# CMAKE_MODULE_PATH
# from xo-cmake-config --cmake-module-path
#
# and expect .cmake macros in
# CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake
# ----------------------------------------------------------------
find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED)
if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND")
message(FATAL "could not find xo-cmake-config executable")
endif()
message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}")
if (NOT XO_SUBMODULE_BUILD)
if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix))
# default to typical install location for xo-project-macros
execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH)
message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
endif()
endif()
# needs to have been installed somewhere on CMAKE_MODULE_PATH,
# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX)
#
include(xo_macros/xo_cxx)
xo_cxx_bootstrap_message()

View file

@ -0,0 +1,8 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(xo_expression)
find_dependency(xo_tokenizer)
#find_dependency(subsys)
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
check_required_components("@PROJECT_NAME@")

View file

@ -0,0 +1,454 @@
/* file parser.hpp
*
* author: Roland Conybeare, Jul 2024
*/
#pragma once
#include "xo/expression/Expression.hpp"
#include "xo/tokenizer/token.hpp"
#include <stack>
#include <stdexcept>
namespace xo {
namespace scm {
// ----- exprir -----
enum class exprirtype {
invalid = -1,
empty,
symbol,
expression,
n_exprirtype
};
extern const char *
exprirtype_descr(exprirtype x);
inline std::ostream &
operator<< (std::ostream & os,
exprirtype x)
{
os << exprirtype_descr(x);
return os;
}
/** intermediate representation for some part of an expression
*
* Examples:
* 1. a variable name (but without type information)
**/
class exprir {
public:
using Expression = xo::ast::Expression;
public:
exprir() = default;
exprir(exprirtype xir_type,
const std::string & x)
: xir_type_{xir_type}, symbol_name_{x} {}
exprir(exprirtype xir_type,
rp<Expression> expr)
: xir_type_{xir_type}, expr_{std::move(expr)} {}
exprirtype xir_type() const { return xir_type_; }
const std::string & symbol_name() const { return symbol_name_; }
const rp<Expression> & expr() const { return expr_; }
void print(std::ostream & os) const;
private:
/** IR type code **/
exprirtype xir_type_ = exprirtype::invalid;
/** xir_type=symbol: a symbol (type or variable) name **/
std::string symbol_name_;
/** xir_type=expression: a completed expression **/
rp<Expression> expr_;
};
inline std::ostream &
operator<< (std::ostream & os, const exprir & x) {
x.print(os);
return os;
}
enum class exprstatetype {
invalid = -1,
/** toplevel of some translation unit **/
expect_toplevel_expression_sequence,
def_0,
def_1,
def_2,
def_3,
def_4,
expect_rhs_expression,
expect_symbol,
n_exprstatetype
};
extern const char *
exprstatetype_descr(exprstatetype x);
inline std::ostream &
operator<< (std::ostream & os, exprstatetype x) {
os << exprstatetype_descr(x);
return os;
}
enum class expractiontype {
invalid = -1,
push1,
push2,
keep,
emit,
pop,
n_expractiontype
};
extern const char *
expractiontype_descr(expractiontype x);
inline std::ostream &
operator<< (std::ostream & os, expractiontype x) {
os << expractiontype_descr(x);
return os;
}
/** an action associated with parser response to an incoming lexical
**/
class expraction {
public:
expraction() = default;
expraction(expractiontype action_type,
const exprir & expr_ir,
exprstatetype push_exs1,
exprstatetype push_exs2)
: action_type_{action_type}, expr_ir_{expr_ir},
push_exs1_{push_exs1}, push_exs2_{push_exs2}
{}
static expraction keep();
static expraction emit(const exprir & ir);
static expraction push2(exprstatetype s1, exprstatetype s2);
expractiontype action_type() const { return action_type_; }
const exprir & expr_ir() const { return expr_ir_; }
exprstatetype push_exs1() const { return push_exs1_; }
exprstatetype push_exs2() const { return push_exs2_; }
void print(std::ostream & os) const;
private:
/**
* push1: push new exprstate built from push_exs1_
* push2: push new exprstate built from push_exs1_,
* followed by push_exs2_
* keep: keep current exprstate (which will have updated inplace)
* pop: drop exprstate, report exprir to parent
**/
expractiontype action_type_ = expractiontype::invalid;
/**
* intermediate representation (pass to enclosing stack state)
**/
exprir expr_ir_;
/** with action_type push1 or push2,
* parser will push exprstate with this type
**/
exprstatetype push_exs1_ = exprstatetype::invalid;
/** with action_type push2,
* parser will push exprstate with this type
* (after pushing exprstate built from push_exs1_)
**/
exprstatetype push_exs2_ = exprstatetype::invalid;
};
inline std::ostream &
operator<< (std::ostream & os,
const expraction & x)
{
x.print(os);
return os;
}
/** state associated with a partially-parsed expression.
**/
class exprstate {
public:
using exprtype = xo::ast::exprtype;
using token_type = token<char>;
public:
exprstate() = default;
exprstate(exprstatetype exs_type) : exs_type_{exs_type} {}
static exprstate expect_toplevel_expression_sequence() {
return exprstate(exprstatetype::expect_toplevel_expression_sequence);
}
static exprstate def_0() {
return exprstate(exprstatetype::def_0);
}
static exprstate expect_symbol() {
return exprstate(exprstatetype::expect_symbol);
}
exprstatetype exs_type() const { return exs_type_; }
/** true iff this parsing state admits a 'def' keyword
* as next token
**/
bool admits_definition() const;
/** true iff this parsing state admits a symbol as next token **/
bool admits_symbol() const;
/** true iff this parsing state admits a colon as next token **/
bool admits_colon() const;
/** true iff this parsing state admits a singleassign '=' as next token **/
bool admits_singleassign() const;
/** true iff this parsing state admits a 64-bit floating point literal token **/
bool admits_f64() const;
/** update exprstate in response to incoming token @p tk,
* forward instructions to parent parser
**/
expraction on_input(const token_type & tk);
/** update exprstate in response to IR (intermediate representation)
* from nested parsing task
**/
expraction on_exprir(const exprir & ir);
/** print human-readable representation on @p os **/
void print(std::ostream & os) const;
private:
expraction on_def();
expraction on_symbol(const token_type & tk);
expraction on_colon();
expraction on_singleassign();
expraction on_f64(const token_type & tk);
private:
/**
* def foo : f64 = 1
* ^ ^ ^ ^ ^ ^ ^
* | | | | | | (done)
* | | | | | def_4:expect_rhs_expression
* | | | | def_3
* | | | def_2:expect_symbol
* | | def_1
* | def_0:expect_symbol
* expect_toplevel_expression_sequence
*
* def_0:expect_symbol: got 'def' keyword, symbol to follow
* def_1: got symbol name
* def_2:expect_symbol got (optional) colon, type name to follow
* def_3: got symbol type
* def_4:expect_rhs_expression got (optional) equal sign, value to follow
* (done): definition complete, pop exprstate from stack
*
**/
exprstatetype exs_type_;
/** e.g. foo in
* def foo : f64 = 1
**/
std::string def_lhs_symbol_;
/** e.g. f64 in
* def foo : f64 = 1
**/
std::string def_lhs_type_;
}; /*exprstate*/
inline std::ostream &
operator<< (std::ostream & os, const exprstate & x) {
x.print(os);
return os;
}
/** schematica parser
*
* Examples:
*
* decltype point
*
* // forward declarations
* decl pi : f64
* decl fib(n : i32) -> i32
*
* def pi = 3.14159265 // constant. = is single assignment
*
* def fib(n : i32) -> i32 {
* // nested defs ok
* def aux(n : i32, s1 : i32, s2 : i32) -> i32 {
* // or:
* // (n == 0) ? s1 : aux(n - 1, s1 + s2, s1)
* //
* if (n == 0) {
* s1
* } else {
* aux(n - 1, s1 + s2, s1)
* }
*
* // or:
* // if (n == 0) ? s1 : aux(n - 1, s1 + s2, s1)
* }
*
* aux(n=n, s1=1, s2=0)
* }
*
* def anotherfib = lambda(n : i32) { fib(n) }
*
* def any : object
* def l : list<object> = '()
*
* deftype point :: {x : f64, y : f64}
* deftype polar :: {arg : f64, mag : f64}
*
* def polar2rect(pt : polar) -> point {
* point(x = pt.mag * cos(arg),
* y = pt.mag * sin(arg))
* }
*
* Grammar:
* toplevel-program = expression*
* type-decl = decltype $typename [<$tp1 .. $tpn>]
* expression = define-expr
* | literal-expr
* | variable-expr
* | apply-expr
* | if-expr
* | lambda-expr
* | block
*
* define-expr = type-decl
* | type-def
* | variable-def
* | function-decl
* | function-def
*
* type-def = deftype $typename [<$tp1 .. $tpn>] :: type-def-rhs
* type-def-rhs = object
* | bool
* | i128 | i64 | i32 | i16 | i8
* | f128 | f64 | f32 | f16
* | struct $typename { ($membername(i) : $typename(i))* }
* [end $typename]
* | tuple $typename { $typename(1), .., $typename(n) }
* [end $typename]
* | copytype $typename
* | subtype $typename { ($member(i) : $typename(i))* }
*
* variable-def = decl $varname [: $typename] [= expression]
* function-decl = decl $functionname($varname(1) : $typename(1),
* ..,
* $varname(n) : $typename(n)) -> $typename[ret]
* function-def = def $functionname($varname(1) : $typename(1),
* ..,
* $varname(n) : $typename(n)) [-> $typename[ret]]
* body-expr
* [ end $functionname ]
* literal-expr = integer-literal
* | fp-literal
* | string-literal
* | symbol-literal
* | struct-literal
*
* variable-expr = $varname
* apply-expr = fn-expr(arg-expr(1), .., arg-expr(n))
* fn-expr = expression
* arg-expr(i) = expression
*
* if-expr = if (test-expr) then-block else else-block
* | (test-expr) ? then-expr : else-expr
* test-expr = expression
* then-block = block
* else-block = block
*
* block = { (definition | expression)* }
*
* lambda-expr = lambda ($paramname(1) : $type(1),
* ..,
* $paramname(n) : $type(n)) body-expr
* body-expr = expression
**/
class parser {
public:
using Expression = xo::ast::Expression;
using token_type = exprstate::token_type; // token<char>;
public:
/** create parser in initial state;
* parser is ready to receive tokens via @ref include_token
**/
parser() = default;
/** for diagnostics: number of entries in parser stack **/
std::size_t stack_size() const { return stack_.size(); }
/** for diagnostics: exprstatetype at level @p i
* (taken relative to top of stack)
*
* @pre 0 <= i < stack_size
**/
exprstatetype i_exstype(std::size_t i) const {
std::size_t z = stack_.size();
if (i < z) {
return stack_[(z - 1) - i].exs_type();
}
/* out of bounds */
return exprstatetype::invalid;
}
/** put parser into state for beginning of a translation unit
* (i.e. input stream)
**/
void begin_translation_unit();
/** include next token @p tk and increment parser state.
*
* @param tk next input token
* @return parsed expression, if @p tk completes an expression.
* otherwise nullptr
**/
rp<Expression> include_token(const token_type & tk);
/** print human-readable representation on stream @p os **/
void print(std::ostream & os) const;
private:
exprstate & top_exprstate();
void push_exprstate(const exprstate & exs);
void pop_exprstate();
private:
/** state recording state associated with enclosing expressions.
*
* Note: at least asof c++23, the std::stack api doesn't support access
* to members other than the top.
*
* for stack with N elements (N = stack_.size()):
* - bottom of stack is stack_[0]
* - top of stack is stack_[N-1]
**/
std::vector<exprstate> stack_;
}; /*parser*/
inline std::ostream &
operator<< (std::ostream & os,
const parser & x) {
x.print(os);
return os;
}
} /*namespace scm*/
} /*namespace xo*/
/* end parser.hpp */

11
src/parser/CMakeLists.txt Normal file
View file

@ -0,0 +1,11 @@
# parser/CMakeLists.txt
set(SELF_LIB xo_parser)
set(SELF_SRCS
parser.cpp)
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
xo_dependency(${SELF_LIB} xo_expression)
xo_dependency(${SELF_LIB} xo_tokenizer)
# end CMakeLists.txt

690
src/parser/parser.cpp Normal file
View file

@ -0,0 +1,690 @@
/* file parser.cpp
*
* author: Roland Conybeare
*/
#include "parser.hpp"
#include "xo/expression/DefineExpr.hpp"
#include "xo/expression/Constant.hpp"
#include <regex>
#include <stdexcept>
namespace xo {
using xo::ast::Expression;
using xo::ast::DefineExpr;
using xo::ast::Constant;
namespace scm {
const char *
exprirtype_descr(exprirtype x) {
switch(x) {
case exprirtype::invalid:
return "?invalid";
case exprirtype::empty:
return "empty";
case exprirtype::symbol:
return "symbol";
case exprirtype::expression:
return "expression";
case exprirtype::n_exprirtype:
break;
}
return "???exprirtype";
}
void
exprir::print(std::ostream & os) const {
os << "<exprir"
<< xtag("type", xir_type_)
<< xtag("symbol_name", symbol_name_)
<< xtag("expr", expr_)
<< ">";
}
const char *
exprstatetype_descr(exprstatetype x) {
switch(x) {
case exprstatetype::invalid:
return "?invalid";
case exprstatetype::expect_toplevel_expression_sequence:
return "expect_toplevel_expression_sequence";
case exprstatetype::def_0:
return "def_0";
case exprstatetype::def_1:
return "def_1";
case exprstatetype::def_2:
return "def_2";
case exprstatetype::def_3:
return "def_3";
case exprstatetype::def_4:
return "def_4";
case exprstatetype::expect_rhs_expression:
return "expect_rhs_expression";
case exprstatetype::expect_symbol:
return "expect_symbol";
case exprstatetype::n_exprstatetype:
break;
}
return "???";
}
const char *
expractiontype_descr(expractiontype x) {
switch(x) {
case expractiontype::invalid:
return "?invalid";
case expractiontype::push1:
return "push1";
case expractiontype::push2:
return "push2";
case expractiontype::keep:
return "keep";
case expractiontype::emit:
return "emit";
case expractiontype::pop:
return "pop";
case expractiontype::n_expractiontype:
break;
}
return "???";
}
expraction
expraction::keep() {
return expraction(expractiontype::keep,
exprir(),
exprstatetype::invalid /*not used*/,
exprstatetype::invalid /*not used*/);
}
expraction
expraction::emit(const exprir & ir) {
return expraction(expractiontype::emit,
ir,
exprstatetype::invalid /*not used*/,
exprstatetype::invalid /*not used*/);
}
expraction
expraction::push2(exprstatetype s1,
exprstatetype s2) {
return expraction(expractiontype::push2,
exprir(),
s1,
s2);
}
void
expraction::print(std::ostream & os) const {
os << "<expraction";
os << xtag("type", action_type_);
os << xtag("ir", expr_ir_);
os << xtag("push_exs1", push_exs1_);
os << xtag("push_exs2", push_exs2_);
os << ">";
}
bool
exprstate::admits_definition() const {
switch(exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
return true;
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
/* note for def_4:
* rhs could certainly be a function body that contains
* nested defines; but then immediately-enclosing-exprstate
* would be a block
*/
return false;
case exprstatetype::expect_rhs_expression:
return false;
case exprstatetype::expect_symbol:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
return false;
}
}
bool
exprstate::admits_symbol() const {
switch(exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
return false;
case exprstatetype::expect_rhs_expression:
/* treat symbol as variable name */
return true;
case exprstatetype::expect_symbol:
return true;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
return false;
}
}
bool
exprstate::admits_colon() const {
switch(exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
return false;
case exprstatetype::def_1:
return true;
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::expect_rhs_expression:
/* rhs-expressions (or expressions for that matter)
* may not begin with a colon
*/
case exprstatetype::expect_symbol:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
return false;
}
}
bool
exprstate::admits_singleassign() const {
switch(exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
return false;
case exprstatetype::def_3:
return true;
case exprstatetype::def_4:
case exprstatetype::expect_rhs_expression:
/* rhs-expressions (or expressions for that matter)
* may not begin with singleassign '='
*/
case exprstatetype::expect_symbol:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
return false;
}
}
bool
exprstate::admits_f64() const {
switch(exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
return false;
case exprstatetype::expect_rhs_expression:
return true;
case exprstatetype::expect_symbol:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
return false;
}
}
expraction
exprstate::on_def() {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_def";
/* lots of illegal states */
if (!this->admits_definition())
{
throw std::runtime_error(tostr(self_name,
": unexpected keyword 'def' for parsing state",
xtag("state", *this)));
}
/* keyword 'def' introduces a definition:
* def pi : f64 = 3.14159265
* def sq(x : f64) -> f64 { (x * x) }
*/
return expraction::push2(exprstatetype::def_0,
/* todo: replace:
* expect_symbol_or_function_signature()
*/
exprstatetype::expect_symbol);
}
expraction
exprstate::on_symbol(const token_type & tk) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_symbol";
if (!this->admits_symbol()) {
throw std::runtime_error
(tostr(self_name,
": unexpected symbol-token for parsing state",
xtag("symbol", tk),
xtag("state", *this)));
}
switch(this->exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
throw std::runtime_error
(tostr(self_name,
": unexpected symbol-token at top-level",
" (expecting decl|def)",
xtag("symbol", tk)));
break;
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
/* unreachable */
assert(false);
return expraction();
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_symbol:
return expraction(expractiontype::pop,
exprir(exprirtype::symbol, tk.text()),
exprstatetype::invalid /*not used*/,
exprstatetype::invalid /*not used*/);
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
assert(false);
return expraction();
}
}
expraction
exprstate::on_colon() {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_colon";
/* lots of illegal states */
if (!this->admits_colon())
{
throw std::runtime_error(tostr(self_name,
": unexpected colon for parsing state",
xtag("state", *this)));
}
if (this->exs_type_ == exprstatetype::def_1) {
this->exs_type_ = exprstatetype::def_2;
return expraction(expractiontype::push1,
exprir(),
exprstatetype::expect_symbol,
exprstatetype::invalid /*not used*/);
} else {
assert(false);
return expraction();
}
}
expraction
exprstate::on_singleassign() {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_singleassign";
if (!this->admits_singleassign())
{
throw std::runtime_error(tostr(self_name,
": unexpected equals for parsing state",
xtag("state", *this)));
}
if (this->exs_type_ == exprstatetype::def_3) {
this->exs_type_ = exprstatetype::def_4;
return expraction(expractiontype::push1,
exprir(),
exprstatetype::expect_rhs_expression,
exprstatetype::invalid /*not used*/);
} else {
assert(false);
return expraction();
}
}
expraction
exprstate::on_f64(const token_type & tk) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_f64";
if (!this->admits_f64())
{
throw std::runtime_error(tostr(self_name,
": unexpected floating-point literal for parsing state",
xtag("state", *this)));
}
if (this->exs_type_ == exprstatetype::expect_rhs_expression) {
return expraction(expractiontype::pop,
exprir(exprirtype::expression,
Constant<double>::make(tk.f64_value())),
exprstatetype::invalid /*not used*/,
exprstatetype::invalid /*not used*/);
} else {
assert(false);
return expraction();
}
}
expraction
exprstate::on_input(const token_type & tk) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
log && log(xtag("tk", tk));
log && log(xtag("state", *this));
switch(tk.tk_type()) {
case tokentype::tk_def:
return this->on_def();
case tokentype::tk_i64:
assert(false);
return expraction();
case tokentype::tk_f64:
return this->on_f64(tk);
case tokentype::tk_string:
assert(false);
return expraction();
case tokentype::tk_symbol:
return this->on_symbol(tk);
case tokentype::tk_leftparen:
case tokentype::tk_rightparen:
case tokentype::tk_leftbracket:
case tokentype::tk_rightbracket:
case tokentype::tk_leftbrace:
case tokentype::tk_rightbrace:
case tokentype::tk_leftangle:
case tokentype::tk_rightangle:
case tokentype::tk_dot:
case tokentype::tk_comma:
assert(false);
return expraction();
case tokentype::tk_colon:
return this->on_colon();
case tokentype::tk_doublecolon:
case tokentype::tk_semicolon:
assert(false);
return expraction();
case tokentype::tk_singleassign:
return this->on_singleassign();
case tokentype::tk_assign:
case tokentype::tk_yields:
case tokentype::tk_type:
case tokentype::tk_lambda:
case tokentype::tk_if:
case tokentype::tk_let:
case tokentype::tk_in:
case tokentype::tk_end:
assert(false);
return expraction();
case tokentype::tk_invalid:
case tokentype::n_tokentype:
assert(false);
return expraction();
}
assert(false);
return expraction();
}
expraction
exprstate::on_exprir(const exprir & ir) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
log && log(xtag("ir", ir));
log && log(xtag("state", *this));
switch(this->exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
/* toplevel expression sequence accepts an
* arbitrary number of expressions.
*
* parser::include_token() returns
*/
if (ir.xir_type() == exprirtype::expression)
return expraction::emit(ir);
/* NOT IMPLEMENTED */
assert(false);
return expraction();
case exprstatetype::def_0:
this->exs_type_ = exprstatetype::def_1;
this->def_lhs_symbol_ = ir.symbol_name();
return expraction::keep();
case exprstatetype::def_1:
/* NOT IMPLEMENTED */
assert(false);
return expraction();
case exprstatetype::def_2:
this->exs_type_ = exprstatetype::def_3;
this->def_lhs_type_ = ir.symbol_name();
return expraction::keep();
case exprstatetype::def_3:
/* NOT IMPLEMENTED */
assert(false);
return expraction();
case exprstatetype::def_4:
/* have all the ingredients to create an expression
* representing a definition
*
* 1. if ir_type is a symbol, interpret as variable name.
* Need to be able to locate variable by type
* 2. if ir_type is an expression, adopt as rhs
*/
if (ir.xir_type() == exprirtype::expression) {
/* TODO: do something with def_lhs_type */
rp<Expression> rhs_value = ir.expr();
rp<Expression> def
= DefineExpr::make(this->def_lhs_symbol_,
rhs_value);
return expraction(expractiontype::pop,
exprir(exprirtype::expression, def),
exprstatetype::invalid /*not used*/,
exprstatetype::invalid /*not used*/);
} else {
assert(false);
return expraction();
}
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_symbol:
/* unreachable
* (this exprstate issues pop instruction from exprstate::on_input()
*/
assert(false);
return expraction();
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
assert(false);
return expraction();
}
}
void
exprstate::print(std::ostream & os) const {
os << "<exprstate"
<< xtag("type", exs_type_)
<< xtag("def_lhs_symbol", def_lhs_symbol_)
<< xtag("def_lhs_type", def_lhs_type_)
<< ">";
}
// ----- parser -----
exprstate &
parser::top_exprstate() {
std::size_t z = stack_.size();
if (z == 0) {
throw std::runtime_error
("parser::top_exprstate: unexpected empty stack");
}
return stack_[z-1];
}
void
parser::push_exprstate(const exprstate & exs) {
std::size_t z = stack_.size();
stack_.resize(z+1);
stack_[z] = exs;
}
void
parser::pop_exprstate() {
std::size_t z = stack_.size();
if (z > 0)
stack_.resize(z-1);
}
void
parser::begin_translation_unit() {
this->push_exprstate
(exprstate::expect_toplevel_expression_sequence());
}
rp<xo::ast::Expression>
parser::include_token(const token_type & tk)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
if (stack_.empty()) {
throw std::runtime_error(tostr("parser::include_token",
": parser not expecting input"
"(call parser.begin_translation_unit()..?)",
xtag("token", tk)));
}
/* stack_ is non-empty */
expraction action = this->top_exprstate().on_input(tk);
/* loop until reach parsing state that requires more input */
for (;;) {
log && log(xtag("action", action));
switch(action.action_type()) {
case expractiontype::keep:
return nullptr;
case expractiontype::emit:
return action.expr_ir().expr();
case expractiontype::pop:
this->pop_exprstate();
if (stack_.empty()) {
throw std::runtime_error(tostr("parser::include_token",
": pop leaves empty stack"));
}
action = this->top_exprstate().on_exprir(action.expr_ir());
break;
case expractiontype::push1:
this->push_exprstate(action.push_exs1());
return nullptr;
case expractiontype::push2:
this->push_exprstate(action.push_exs1());
this->push_exprstate(action.push_exs2());
return nullptr;
case expractiontype::invalid:
case expractiontype::n_expractiontype:
/* unreachable */
assert(false);
return nullptr;
}
}
} /*include_token*/
void
parser::print(std::ostream & os) const {
os << "<parser"
<< xtag("stack", stack_.size())
<< std::endl;
for (std::size_t i = 0, z = stack_.size(); i < z; ++i) {
os << " [" << i << "] "
<< stack_[i]
<< std::endl;
}
os << ">" << std::endl;
}
} /*namespace scm*/
} /*namespace xo*/
/* end parser.cpp */

16
utest/CMakeLists.txt Normal file
View file

@ -0,0 +1,16 @@
# xo-parser/utest/CMakeLists.txt
set(UTEST_EXE utest.parser)
set(UTEST_SRCS
parser_utest_main.cpp
parser.test.cpp)
if (ENABLE_TESTING)
xo_add_utest_executable(${UTEST_EXE} ${UTEST_SRCS})
xo_self_dependency(${UTEST_EXE} xo_parser)
#xo_dependency(${UTEST_EXE} xo_ratio)
#xo_dependency(${UTEST_EXE} xo_reflectutil)
xo_external_target_dependency(${UTEST_EXE} Catch2 Catch2::Catch2)
endif()
# end CMakeLists.txt

199
utest/parser.test.cpp Normal file
View file

@ -0,0 +1,199 @@
/* file parser.test.cpp
*
* author: Roland Conybeare
*/
#include "xo/parser/parser.hpp"
#include <catch2/catch.hpp>
namespace xo {
using parser_type = xo::scm::parser;
using token_type = parser_type::token_type;
using xo::scm::exprstatetype;
using std::cerr;
using std::endl;
//using xo::ast::Expression;
namespace ut {
TEST_CASE("parser", "[parser]") {
parser_type parser;
parser.begin_translation_unit();
REQUIRE(parser.stack_size() == 1);
REQUIRE(parser.i_exstype(0)
== exprstatetype::expect_toplevel_expression_sequence);
/* input:
* def
*/
{
auto r1 = parser.include_token(token_type::def());
REQUIRE(r1.get() == nullptr);
/* stack should be:
*
* expect_toplevel_expression_sequence
* def_0
* expect_symbol
*/
CHECK(parser.stack_size() == 3);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0) == exprstatetype::expect_symbol);
if (parser.stack_size() > 1)
CHECK(parser.i_exstype(1) == exprstatetype::def_0);
if (parser.stack_size() > 2)
CHECK(parser.i_exstype(2)
== exprstatetype::expect_toplevel_expression_sequence);
}
/* input:
* def foo
* ^ ^
* 0 1
*/
{
auto r2 = parser.include_token(token_type::symbol_token("foo"));
cerr << "parser state after [def foo]" << endl;
cerr << parser << endl;
REQUIRE(r2.get() == nullptr);
/* stack should be:
*
* expect_toplevel_expression_sequence
* def_1
*/
CHECK(parser.stack_size() == 2);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0) == exprstatetype::def_1);
if (parser.stack_size() > 1)
CHECK(parser.i_exstype(1)
== exprstatetype::expect_toplevel_expression_sequence);
}
/* input:
* def foo :
* ^ ^
* 0 1
*/
{
auto r3 = parser.include_token(token_type::colon());
cerr << "parser state after [def foo :]" << endl;
cerr << parser << endl;
REQUIRE(r3.get() == nullptr);
/* stack should be:
*
* expect_toplevel_expression_sequence
* def_2
* expect_symbol
*/
CHECK(parser.stack_size() == 3);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0) == exprstatetype::expect_symbol);
if (parser.stack_size() > 1)
CHECK(parser.i_exstype(1) == exprstatetype::def_2);
if (parser.stack_size() > 2)
CHECK(parser.i_exstype(2)
== exprstatetype::expect_toplevel_expression_sequence);
}
/* input:
* def foo : footype
* ^ ^
* 0 1
*/
{
auto r4 = parser.include_token(token_type::symbol_token("footype"));
cerr << "parser state after [def foo : footype]" << endl;
cerr << parser << endl;
REQUIRE(r4.get() == nullptr);
CHECK(parser.stack_size() == 2);
/* stack should be:
*
* expect_toplevel_expression_sequence
* def_3
*/
CHECK(parser.stack_size() == 2);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0) == exprstatetype::def_3);
if (parser.stack_size() > 1)
CHECK(parser.i_exstype(1)
== exprstatetype::expect_toplevel_expression_sequence);
/* expecting either:
* = rhs-expression
* new-expression
*/
}
/* input:
* def foo : footype =
* ^ ^
* 0 1
*/
{
auto r5 = parser.include_token(token_type::singleassign());
cerr << "parser state after [def foo : footype =]" << endl;
cerr << parser << endl;
REQUIRE(r5.get() == nullptr);
CHECK(parser.stack_size() == 3);
/* stack should be
*
* expect_toplevel_expression_sequence
* def_4
* expect_expression
*/
CHECK(parser.stack_size() == 3);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0) == exprstatetype::expect_rhs_expression);
if (parser.stack_size() > 1)
CHECK(parser.i_exstype(1) == exprstatetype::def_4);
if (parser.stack_size() > 2)
CHECK(parser.i_exstype(2)
== exprstatetype::expect_toplevel_expression_sequence);
}
/* input:
* def foo : footype = 3.14159265
* ^ ^
* 0 1
*/
{
auto r6 = parser.include_token(token_type::f64_token("3.14159265"));
cerr << "parser state after [def foo : footype = 3.14159265]" << endl;
cerr << parser << endl;
REQUIRE(r6.get() != nullptr);
CHECK(parser.stack_size() == 1);
/* stack should be
*
* expect_toplevel_expression_sequence
*/
CHECK(parser.stack_size() == 1);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0)
== exprstatetype::expect_toplevel_expression_sequence);
}
} /*TEST_CASE(parser)*/
} /*namespace ut*/
} /*namespace xo*/
/* end parser.test.cpp */

View file

@ -0,0 +1,6 @@
/* file parser_utest_main.cpp */
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
/* end parser_utest_main.cpp */