xo-parser: prep: semicolon expr separator, prep for infix ops

This commit is contained in:
Roland Conybeare 2024-08-06 23:09:05 -04:00
commit 37268113fb
6 changed files with 409 additions and 60 deletions

View file

@ -30,6 +30,8 @@ namespace xo {
expect_symbol,
expect_type,
expr_progress,
n_exprstatetype
};
@ -58,18 +60,29 @@ namespace xo {
public:
exprstate() = default;
exprstate(exprstatetype exs_type,
rp<DefineExprAccess> def_expr = nullptr)
rp<Expression> candidate_expr,
rp<DefineExprAccess> def_expr)
: exs_type_{exs_type},
gen_expr_{std::move(candidate_expr)},
def_expr_{std::move(def_expr)} {}
static exprstate expect_toplevel_expression_sequence() {
return exprstate(exprstatetype::expect_toplevel_expression_sequence);
return exprstate(exprstatetype::expect_toplevel_expression_sequence, nullptr, nullptr);
}
static exprstate def_0() {
return exprstate(exprstatetype::def_0);
static exprstate expect_rhs_expression() {
return exprstate(exprstatetype::expect_rhs_expression, nullptr, nullptr);
}
static exprstate expect_symbol() {
return exprstate(exprstatetype::expect_symbol);
return exprstate(exprstatetype::expect_symbol, nullptr, nullptr);
}
static exprstate expect_type() {
return exprstate(exprstatetype::expect_type, nullptr, nullptr);
}
static exprstate make_expr_progress(rp<Expression> expr) {
return exprstate(exprstatetype::expr_progress, expr, nullptr);
}
static exprstate def_0(rp<DefineExprAccess> def_expr) {
return exprstate(exprstatetype::def_0, nullptr, def_expr);
}
exprstatetype exs_type() const { return exs_type_; }
@ -82,8 +95,14 @@ namespace xo {
bool admits_symbol() const;
/** true iff this parsing state admits a colon as next token **/
bool admits_colon() const;
/** true iff this parsing state admits a semicolon as next token **/
bool admits_semicolon() const;
/** true iff this parsing state admits a singleassign '=' as next token **/
bool admits_singleassign() const;
#ifdef NOT_YET
/** true iff this parsing state admits a leftparen '(' as next token **/
bool admits_leftparen() const;
#endif
/** true iff this parsing state admits a 64-bit floating point literal token **/
bool admits_f64() const;
@ -112,16 +131,23 @@ namespace xo {
exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
void on_colon(exprstatestack * p_stack);
void on_semicolon(exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
void on_singleassign(exprstatestack * p_stack);
#ifdef NOT_YET
void on_leftparen(exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
#endif
void on_f64(const token_type & tk,
exprstatestack * p_stack,
rp<Expression> * p_emit_expr);
private:
/**
* def foo : f64 = 1
* ^ ^ ^ ^ ^ ^ ^
* | | | | | | (done)
* def foo : f64 = 1 ;
* ^ ^ ^ ^ ^ ^ ^ ^
* | | | | | | | (done)
* | | | | | | ??
* | | | | | def_4:expect_rhs_expression
* | | | | def_3
* | | | def_2:expect_type
@ -139,6 +165,8 @@ namespace xo {
**/
exprstatetype exs_type_;
/** generic expression **/
rp<Expression> gen_expr_;
/** scaffold a define-expression here **/
rp<DefineExprAccess> def_expr_;
/** scafford a convert-expression here.
@ -199,10 +227,10 @@ namespace xo {
* decltype point
*
* // forward declarations
* decl pi : f64
* decl fib(n : i32) -> i32
* decl pi : f64;
* decl fib(n : i32) -> i32;
*
* def pi = 3.14159265 // constant. = is single assignment
* def pi = 3.14159265; // constant. = is single assignment
*
* def fib(n : i32) -> i32 {
* // nested defs ok
@ -211,33 +239,37 @@ namespace xo {
* // (n == 0) ? s1 : aux(n - 1, s1 + s2, s1)
* //
* if (n == 0) {
* s1
* s1;
* } else {
* aux(n - 1, s1 + s2, s1)
* aux(n - 1, s1 + s2, s1);
* }
*
* // or:
* // if (n == 0) ? s1 : aux(n - 1, s1 + s2, s1)
* }
*
* aux(n=n, s1=1, s2=0)
* aux(n=n, s1=1, s2=0);
* }
*
* def anotherfib = lambda(n : i32) { fib(n) }
* def x := "fu"; // non-constant
* x += "bar";
*
* def any : object
* def l : list<object> = '()
* def anotherfib = lambda(n : i32) { fib(n) };
*
* deftype point :: {x : f64, y : f64}
* deftype polar :: {arg : f64, mag : f64}
* def any : object;
* def l : list<object> = '();
*
* deftype point :: {x : f64, y : f64};
* deftype polar :: {arg : f64, mag : f64};
*
* def polar2rect(pt : polar) -> point {
* point(x = pt.mag * cos(arg),
* y = pt.mag * sin(arg))
* y = pt.mag * sin(arg));
* }
*
* Grammar:
* toplevel-program = expression*
* toplevel-program = $expression(1); ..; $expression(n)
*
* type-decl = decltype $typename [<$tp1 .. $tpn>]
* expression = define-expr
* | literal-expr
@ -245,6 +277,7 @@ namespace xo {
* | apply-expr
* | if-expr
* | lambda-expr
* | arithmetic-expr
* | block
*
* define-expr = type-decl
@ -297,6 +330,23 @@ namespace xo {
* ..,
* $paramname(n) : $type(n)) body-expr
* body-expr = expression
*
* arithmetic-expr = expression binop expression
*
* binop = +
* | -
* | *
* | /
* | |
* | &
* | ^
* | ==
* | !=
* | <
* | <=
* | =>
* | >
*
**/
class parser {
public:

View file

@ -18,6 +18,9 @@ namespace xo {
using Expression = xo::ast::Expression;
using span_type = span<const char>;
reader_result(rp<Expression> expr, span_type rem)
: expr_{std::move(expr)}, rem_{rem} {}
/** parsed schematica expression **/
rp<Expression> expr_;
/** span giving text input consumed to construct expr,

View file

@ -21,7 +21,7 @@ namespace xo {
namespace scm {
const char *
exprstatetype_descr(exprstatetype x) {
switch(x) {
switch (x) {
case exprstatetype::invalid:
return "?invalid";
case exprstatetype::expect_toplevel_expression_sequence:
@ -42,6 +42,8 @@ namespace xo {
return "expect_symbol";
case exprstatetype::expect_type:
return "expect_type";
case exprstatetype::expr_progress:
return "expr_progress";
case exprstatetype::n_exprstatetype:
break;
}
@ -51,7 +53,7 @@ namespace xo {
bool
exprstate::admits_definition() const {
switch(exs_type_) {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
return true;
@ -71,6 +73,8 @@ namespace xo {
case exprstatetype::expect_symbol:
case exprstatetype::expect_type:
return false;
case exprstatetype::expr_progress:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -82,7 +86,7 @@ namespace xo {
bool
exprstate::admits_symbol() const {
switch(exs_type_) {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
@ -102,6 +106,9 @@ namespace xo {
/* treat symbol as typename */
return true;
case exprstatetype::expr_progress:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -113,7 +120,7 @@ namespace xo {
bool
exprstate::admits_colon() const {
switch(exs_type_) {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
return false;
@ -132,6 +139,9 @@ namespace xo {
case exprstatetype::expect_type:
return false;
case exprstatetype::expr_progress:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -141,14 +151,37 @@ namespace xo {
return false;
}
bool
exprstate::admits_semicolon() const {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_symbol:
case exprstatetype::expect_type:
return false;
case exprstatetype::expr_progress:
return true;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
return false;
}
return false;
}
bool
exprstate::admits_singleassign() const {
switch(exs_type_) {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
/*
* def foo = 1
* def foo : f64 = 1
* def foo = 1 ;
* def foo : f64 = 1 ;
* ^ ^ ^ ^ ^ ^ ^
* | | | | | | (done)
* | | | | | def_4:expect_rhs_expression
@ -182,6 +215,9 @@ namespace xo {
case exprstatetype::expect_type:
return false;
case exprstatetype::expr_progress:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -193,7 +229,7 @@ namespace xo {
bool
exprstate::admits_f64() const {
switch(exs_type_) {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
@ -209,6 +245,9 @@ namespace xo {
case exprstatetype::expect_type:
return false;
case exprstatetype::expr_progress:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -218,6 +257,61 @@ namespace xo {
return false;
}
#ifdef NOT_YET
bool
exprstate::admits_leftparen() const {
switch (exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
/* input like
* (function(blah...))
* not allowed at toplevel;
* creates ambiguity e.g. consider
* x := foo
* (bar)
*
* is rhs 'foo' or 'foo(bar)'
*/
return false;
case exprstatetype::def_0:
case exprstatetype::def_1:
case exprstatetype::def_2:
case exprstatetype::def_3:
case exprstatetype::def_4:
/* input like
* def foo : f64 = (
* ^ ^ ^ ^ ^
* | | | | def_4
* | | | def_3
* | | def_2
* | def_1
* def_0
*
* not allowed or relies on pushing another state
*/
return false;
case exprstatetype::expect_rhs_expression:
/* can always begin non-toplevel expression with '(' */
return true;
case exprstatetype::expect_type:
return false;
case exprstatetype::expect_symbol:
return false;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
assert(false);
return false;
}
return false;
}
#endif
void
exprstate::on_def(exprstatestack * p_stack) {
constexpr bool c_debug_flag = true;
@ -233,13 +327,13 @@ namespace xo {
xtag("state", *this)));
}
p_stack->push_exprstate(exprstate(exprstatetype::def_0,
DefineExprAccess::make_empty()));
p_stack->push_exprstate
(exprstate::def_0(DefineExprAccess::make_empty()));
/* todo: replace:
* expect_symbol_or_function_signature()
*/
p_stack->push_exprstate(exprstatetype::expect_symbol);
p_stack->push_exprstate(exprstate::expect_symbol());
/* keyword 'def' introduces a definition:
* def pi : f64 = 3.14159265
@ -255,6 +349,8 @@ namespace xo {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
log && log(xtag("exstype", p_stack->top_exprstate().exs_type()));
constexpr const char * self_name = "exprstate::on_symbol";
if (!this->admits_symbol()) {
@ -265,7 +361,7 @@ namespace xo {
xtag("state", *this)));
}
switch(this->exs_type_) {
switch (this->exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
throw std::runtime_error
(tostr(self_name,
@ -284,13 +380,44 @@ namespace xo {
return;
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_symbol:
/* have to do pop first */
{
/* various possibilities when looking for rhs expression:
*
* x := y // (1)
* x := f(a) // (2)
* x := f(a,b) // (3)
*
* need lookahead token following symbol to distinguish
* between (1) (symbol completes rhs expression)
* and {(2), (3)} (symbol is function call)
*/
/* have to do pop first, before sending symbol to
* the o.g. symbol-requester
*/
#ifdef NOT_YET
p_stack->push_exprstate(exprstate(exprstatetype::expr_progress,
Variable::make(name, type)));
#endif
#ifdef LATER
p_stack->pop_exprstate();
p_stack->top_exprstate().on_symbol(tk.text(),
p_stack, p_emit_expr);
#endif
return;
}
case exprstatetype::expect_symbol:
{
/* have to do pop first, before sending symbol to
* the o.g. symbol-requester
*/
p_stack->pop_exprstate();
p_stack->top_exprstate().on_symbol(tk.text(),
p_stack, p_emit_expr);
return;
}
case exprstatetype::expect_type: {
TypeDescr td = nullptr;
@ -321,20 +448,27 @@ namespace xo {
return;
}
case exprstatetype::expr_progress:
/* illegal input, e.g.
* foo bar
*/
assert(false);
return;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
assert(false);
return;
}
}
} /*on_symbol*/
void
exprstate::on_typedescr(TypeDescr td,
exprstatestack * /*p_stack*/,
rp<Expression> * /*p_emit_expr*/)
{
switch(this->exs_type_) {
switch (this->exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
case exprstatetype::def_0:
case exprstatetype::def_1:
@ -366,6 +500,10 @@ namespace xo {
assert(false);
return;
case exprstatetype::expr_progress:
assert(false);
return;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -392,7 +530,36 @@ namespace xo {
if (this->exs_type_ == exprstatetype::def_1) {
this->exs_type_ = exprstatetype::def_2;
p_stack->push_exprstate(exprstatetype::expect_type);
p_stack->push_exprstate(exprstate::expect_type());
} else {
assert(false);
}
}
void
exprstate::on_semicolon(exprstatestack * p_stack,
rp<Expression> * p_emit_expr)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
constexpr const char * self_name = "exprstate::on_semicolon";
if (!this->admits_semicolon())
{
throw std::runtime_error(tostr(self_name,
": unexpected semicolon for parsing state",
xtag("state", *this)));
}
if (this->exs_type_ == exprstatetype::expr_progress) {
rp<Expression> expr = this->gen_expr_;
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
p_stack->top_exprstate().on_expr(expr,
p_stack,
p_emit_expr);
} else {
assert(false);
}
@ -417,16 +584,51 @@ namespace xo {
{
this->exs_type_ = exprstatetype::def_4;
p_stack->push_exprstate(exprstatetype::expect_rhs_expression);
p_stack->push_exprstate(exprstate::expect_rhs_expression());
} else {
assert(false);
}
}
#ifdef OBSOLETE
/**
consider input:
x := y(foo())
Is that an assignment x:=y followed by function call (foo()) ?
Or assignment with rhs calling a function y() with argument foo()
policy: forbid parenthesis as beginning of a toplevel expression
**/
void
exprstate::on_leftparen(exprstatestack * p_stack,
rp<Expression> * p_emit_expr)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
if (!this->admits_leftparen())
{
throw std::runtime_error(tostr(self_name,
": unexpected leftparen '(' for parsing state",
xtag("state", *this)));
}
if (this->exs_type_ == exprstatetype::expect_rhs_expression) {
/* push lparen_0 to remember to look for subsequent rightparen */
p_stack->push_exprstate(exprstatetype::lparen_0);
p_stack->push_exprstate(exprstatetype::expect_rhs_expression);
}
}
#endif
void
exprstate::on_f64(const token_type & tk,
exprstatestack * p_stack,
rp<Expression> * p_emit_expr)
rp<Expression> * /*p_emit_expr*/)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
@ -441,11 +643,9 @@ namespace xo {
}
if (this->exs_type_ == exprstatetype::expect_rhs_expression) {
p_stack->pop_exprstate();
p_stack->top_exprstate().on_expr(Constant<double>::make(tk.f64_value()),
p_stack,
p_emit_expr);
p_stack->push_exprstate
(exprstate::make_expr_progress
(Constant<double>::make(tk.f64_value())));
} else {
assert(false);
}
@ -461,7 +661,7 @@ namespace xo {
log && log(xtag("tk", tk));
log && log(xtag("state", *this));
switch(tk.tk_type()) {
switch (tk.tk_type()) {
case tokentype::tk_def:
this->on_def(p_stack);
@ -503,10 +703,13 @@ namespace xo {
return;
case tokentype::tk_doublecolon:
case tokentype::tk_semicolon:
assert(false);
return;
case tokentype::tk_semicolon:
this->on_semicolon(p_stack, p_emit_expr);
return;
case tokentype::tk_singleassign:
this->on_singleassign(p_stack);
return;
@ -538,7 +741,13 @@ namespace xo {
exprstatestack * p_stack,
rp<Expression> * p_emit_expr)
{
switch(this->exs_type_) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
log && log(xtag("exstype", this->exs_type_),
xtag("expr", expr));
switch (this->exs_type_) {
case exprstatetype::expect_toplevel_expression_sequence:
/* toplevel expression sequence accepts an
* arbitrary number of expressions.
@ -546,7 +755,7 @@ namespace xo {
* parser::include_token() returns
*/
*p_emit_expr = expr.get();
*p_emit_expr = expr.promote();
return;
case exprstatetype::def_0:
case exprstatetype::def_1:
@ -580,7 +789,17 @@ namespace xo {
return;
}
case exprstatetype::expect_rhs_expression:
case exprstatetype::expect_rhs_expression: {
p_stack->pop_exprstate(); /* NOT KOSHER. invalidates *this */
p_stack->top_exprstate().on_expr(expr,
p_stack,
p_emit_expr);
return;
}
case exprstatetype::expect_type:
case exprstatetype::expect_symbol:
/* unreachable
@ -588,13 +807,18 @@ namespace xo {
*/
assert(false);
return;
case exprstatetype::expr_progress:
/* consecutive expressions isn't legal
*/
assert(false);
return;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
assert(false);
return;
}
}
} /*on_expr*/
void
exprstate::on_symbol(const std::string & symbol_name,
@ -634,6 +858,9 @@ namespace xo {
*/
assert(false);
return;
case exprstatetype::expr_progress:
assert(false);
return;
case exprstatetype::invalid:
case exprstatetype::n_exprstatetype:
/* unreachable */
@ -645,9 +872,11 @@ namespace xo {
void
exprstate::print(std::ostream & os) const {
os << "<exprstate"
<< xtag("type", exs_type_)
<< xtag("def_expr", def_expr_)
<< xtag("cvt_expr", cvt_expr_);
<< xtag("type", exs_type_);
if (def_expr_)
os << xtag("def_expr", def_expr_);
if (cvt_expr_)
os << xtag("cvt_expr", cvt_expr_);
os << ">";
}
@ -667,6 +896,10 @@ namespace xo {
void
exprstatestack::push_exprstate(const exprstate & exs) {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag),
xtag("exs", exs));
std::size_t z = stack_.size();
stack_.resize(z+1);
@ -676,6 +909,10 @@ namespace xo {
void
exprstatestack::pop_exprstate() {
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag),
xtag("top.exstype", top_exprstate().exs_type()));
std::size_t z = stack_.size();
if (z > 0)
@ -714,7 +951,7 @@ namespace xo {
parser::include_token(const token_type & tk)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
scope log(XO_DEBUG(c_debug_flag), xtag("tk", tk));
if (xs_stack_.empty()) {
throw std::runtime_error(tostr("parser::include_token",
@ -729,6 +966,7 @@ namespace xo {
xs_stack_.top_exprstate().on_input(tk, &xs_stack_, &retval);
log && log(xtag("retval", retval));
return retval;
} /*include_token*/

View file

@ -17,6 +17,9 @@ namespace xo {
reader_result
reader::read_expr(const span_type & input_arg, bool eof)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag));
span_type input = input_arg;
/* input text-span consumed by this call.
@ -32,7 +35,13 @@ namespace xo {
const auto & tk = sr.first;
const span_type & used_span = sr.second;
log && log(xtag("used_span", used_span));
log && log(xtag("input.pre", input));
input = input.after_prefix(used_span);
log && log(xtag("expr_span.pre", expr_span));
expr_span += used_span;
if (tk.is_valid()) {
@ -40,6 +49,9 @@ namespace xo {
auto expr = this->parser_.include_token(tk);
if (expr) {
log && log(xtag("outcome", "victory!"),
xtag("expr", expr));
/* token completes an expression -> victory */
return reader_result(expr, expr_span);
} else {
@ -48,7 +60,6 @@ namespace xo {
*
* input span may contain more tokens -> iterate
*/
input = input.after_prefix(used_span);
}
} else {
assert(input.empty());
@ -76,6 +87,8 @@ namespace xo {
}
}
log && log(xtag("outcome", "noop"));
return reader_result(nullptr, expr_span);
}

View file

@ -201,7 +201,43 @@ namespace xo {
cerr << "parser state after [def foo : f64 = 3.14159265]" << endl;
cerr << parser << endl;
REQUIRE(r6.get() != nullptr);
REQUIRE(r6.get() == nullptr);
/* stack should be
*
* expect_toplevel_expression_sequence
*/
CHECK(parser.stack_size() == 4);
if (parser.stack_size() > 0)
CHECK(parser.i_exstype(0) == exprstatetype::expr_progress);
if (parser.stack_size() > 1)
CHECK(parser.i_exstype(1) == exprstatetype::expect_rhs_expression);
if (parser.stack_size() > 2)
CHECK(parser.i_exstype(2) == exprstatetype::def_4);
if (parser.stack_size() > 3)
CHECK(parser.i_exstype(3)
== exprstatetype::expect_toplevel_expression_sequence);
}
/* input:
*
* i_tc==0:
* def foo = 3.14159265 ;
* ^ ^
* 0 1
*
* i_tc==1:
* def foo : f64 = 3.14159265 ;
* ^ ^
* 0 1
*/
{
auto r7 = parser.include_token(token_type::semicolon());
cerr << "parser state after [def foo : f64 = 3.14159265;]" << endl;
cerr << parser << endl;
REQUIRE(r7.get() != nullptr);
CHECK(parser.stack_size() == 1);

View file

@ -18,11 +18,20 @@ namespace xo {
rdr.begin_translation_unit();
try {
auto rr = rdr.read_expr(reader::span_type::from_cstr("def foo : f64 = 3.14159265"),
true /*eof*/);
auto input
= reader::span_type::from_cstr("def foo : f64 = 3.14159265;");
auto rr
= rdr.read_expr(input, true /*eof*/);
REQUIRE(rr.expr_.get());
REQUIRE(rr.rem_.empty());
log && log(xtag("expr", rr.expr_));
input = input.after_prefix(rr.rem_);
log && log(xtag("post.input", input));
REQUIRE(input.empty());
} catch (std::exception & ex) {
log && log(ex.what());