xo-expression xo-reader: parser improvements, prep type inf/unify

This commit is contained in:
Roland Conybeare 2025-07-28 09:12:41 -04:00
commit f3fde735da
18 changed files with 274 additions and 45 deletions

View file

@ -6,7 +6,8 @@
#pragma once
#include "xo/refcnt/Refcounted.hpp"
#include "xo/reflect/TypeDescr.hpp"
#include "xo/expression/typeinf/type_ref.hpp"
//#include "xo/reflect/TypeDescr.hpp"
#include "exprtype.hpp"
namespace xo {
@ -20,16 +21,25 @@ namespace xo {
**/
class GeneralizedExpression : public ref::Refcount {
public:
using type_ref = xo::scm::type_ref;
using prefix_type = xo::scm::prefix_type;
using TypeDescr = xo::reflect::TypeDescr;
using ppstate = xo::print::ppstate;
using ppindentinfo = xo::print::ppindentinfo;
public:
GeneralizedExpression(exprtype extype, TypeDescr valuetype)
: extype_{extype}, valuetype_{valuetype}{}
/** if @p valuetype is null, generate unique type variable
* using prefix derived from @p extype.
**/
GeneralizedExpression(exprtype extype, TypeDescr valuetype);
/** if @p valuetype is null, generate unique type variable
* name, beginning with @p prefix
**/
GeneralizedExpression(exprtype extype, prefix_type prefix, TypeDescr valuetype);
exprtype extype() const { return extype_; }
TypeDescr valuetype() const { return valuetype_; }
const type_ref & valuetype_ref() const { return valuetype_ref_; }
TypeDescr valuetype() const { return valuetype_ref_.td(); }
/** write human-readable representation to stream @p os **/
virtual void display(std::ostream & os) const = 0;
@ -39,7 +49,7 @@ namespace xo {
virtual std::uint32_t pretty_print(const ppindentinfo & ppii) const = 0;
/** useful when scaffolding expressions in a parser **/
void assign_valuetype(TypeDescr x) { valuetype_ = x; }
void assign_valuetype(TypeDescr x) { valuetype_ref_.resolve_to(x); }
private:
/** expression type (constant | apply | ..) for this expression **/
@ -47,7 +57,7 @@ namespace xo {
/** type information (when available) for values produced by this
* expression.
**/
TypeDescr valuetype_ = nullptr;
type_ref valuetype_ref_;
};
inline std::ostream &

View file

@ -22,6 +22,17 @@ namespace xo {
**/
class Lambda : public FunctionInterface {
public:
/**
* @p name. Name for this lambda -- must be unique
* @p lambda_type. Function signature
* @p local_env. Environment with formals as content
* @p body. Expression for lambda function body
**/
static rp<Lambda> make(const std::string & name,
TypeDescr lambda_type,
const rp<LocalEnv> & local_env,
const rp<Expression> & body);
/**
* @p name Name for this lambda -- must be unique
* @p argv Formal parameters, in left-to-right order
@ -43,6 +54,21 @@ namespace xo {
TypeDescr explicit_return_td,
const rp<Expression> & body);
/** create type description for lambda with arguments @p argv
* and return type @p return_td
**/
static TypeDescr assemble_lambda_td(const std::vector<rp<Variable>> & argv,
TypeDescr return_td);
/** create type description for lambda with arguments @p argv
* and body expression @p body.
* @p explicit_return_td will be used if non-null.
* otherwise use @p body valuetype
**/
static TypeDescr assemble_lambda_td(const std::vector<rp<Variable>> & argv,
TypeDescr explicit_return_td,
const rp<Expression> & body);
/** downcast from Expression **/
static bp<Lambda> from(bp<Expression> x) {
return bp<Lambda>::from(x);
@ -102,13 +128,6 @@ namespace xo {
virtual std::uint32_t pretty_print(const ppindentinfo & ppii) const override;
protected:
/** create type description for lambda with arguments @p argv
* and body expression @p body
**/
static TypeDescr assemble_lambda_td(const std::vector<rp<Variable>> & argv,
TypeDescr explicit_return_td,
const rp<Expression> & body);
/** create string description for function signature,
* consistent with c++ expectation
**/

View file

@ -13,7 +13,8 @@ namespace xo {
/** @class type_ref
* @brief name and eventual resolution for type associated with an expression.
*
* Type inference / unification operates on @ref xo::scm::TypeTemplate instances, see also.
* Type inference / unification operates on
* @ref xo::scm::TypeBlueprint instances, see also.
**/
class type_ref {
public:
@ -23,6 +24,12 @@ namespace xo {
type_ref() = default;
type_ref(const type_var& id, TypeDescr td);
/** if type not determined (@p td is nullptr),
* -> generate and store type variable name.
* otherwise type already resolvedn
**/
static type_ref dwim(prefix_type prefix, TypeDescr td);
/** generate a unique type-variable name, that begins with @p prefix **/
static type_var generate_unique(prefix_type prefix);

View file

@ -49,6 +49,9 @@ namespace xo {
**/
unify_result unify(bp<TypeBlueprint> lhs, bp<TypeBlueprint> rhs);
/** lookup type variable by @p name, to get resolution **/
rp<TypeBlueprint> lookup(const type_var & name) const;
private:
type_substitution_map constraint_map_;
};

View file

@ -64,6 +64,7 @@ namespace xo {
DefineExpr::pretty_print(const ppindentinfo & ppii) const
{
return ppii.pps()->pretty_struct(ppii, "Define",
//refrtag("type", this->valuetype()), // need pretty
refrtag("name", lhs_var_->name()),
refrtag("rhs", rhs_));
}

View file

@ -6,6 +6,43 @@
namespace xo {
namespace ast {
namespace {
using xo::scm::prefix_type;
prefix_type exprtype2prefix(exprtype x)
{
switch (x) {
case exprtype::invalid: assert(false); break;
case exprtype::constant: return prefix_type::from_chars("k");
case exprtype::primitive: return prefix_type::from_chars("pm");
case exprtype::define: return prefix_type::from_chars("def");
case exprtype::assign: return prefix_type::from_chars("=");
case exprtype::apply: return prefix_type::from_chars("@");
case exprtype::lambda: return prefix_type::from_chars("lm");
case exprtype::variable: return prefix_type::from_chars("var");
case exprtype::ifexpr: return prefix_type::from_chars("if");
case exprtype::sequence: return prefix_type::from_chars("seq");
case exprtype::convert: return prefix_type::from_chars("cvt");
case exprtype::n_expr: assert(false); break;
}
return prefix_type::from_chars("?expr");
}
}
GeneralizedExpression::GeneralizedExpression(exprtype extype,
TypeDescr valuetype)
: extype_{extype},
valuetype_ref_{type_ref::dwim(exprtype2prefix(extype), valuetype)}
{}
GeneralizedExpression::GeneralizedExpression(exprtype extype,
prefix_type prefix,
TypeDescr valuetype)
: extype_{extype},
valuetype_ref_{type_ref::dwim(prefix, valuetype)}
{}
std::string
GeneralizedExpression::display_string() const {
return tostr(*this);

View file

@ -20,16 +20,9 @@ namespace xo {
namespace ast {
TypeDescr
Lambda::assemble_lambda_td(const std::vector<rp<Variable>> & argv,
TypeDescr explicit_return_td,
const rp<Expression> & body)
TypeDescr return_td)
{
if (!body)
return nullptr;
/** assemble function type.
*
* NOTE: need this to be unique!
**/
assert(return_td != nullptr);
std::vector<TypeDescr> arg_td_v;
{
@ -40,6 +33,25 @@ namespace xo {
}
}
auto function_info
= FunctionTdxInfo(return_td,
arg_td_v,
false /*!is_noexcept*/);
TypeDescr lambda_td
= TypeDescrBase::require_by_fn_info(function_info);
return lambda_td;
}
TypeDescr
Lambda::assemble_lambda_td(const std::vector<rp<Variable>> & argv,
TypeDescr explicit_return_td,
const rp<Expression> & body)
{
if (!body)
return nullptr;
if (explicit_return_td && body->valuetype() && (explicit_return_td != body->valuetype())) {
throw std::runtime_error(tostr("explicit lambda return type T1 conflicts with lambda body T2",
xtag("T1", explicit_return_td),
@ -48,15 +60,9 @@ namespace xo {
// TODO: unify(explicit_return_td, body->valuetype())
auto function_info
= FunctionTdxInfo(explicit_return_td ? explicit_return_td : body->valuetype(),
arg_td_v,
false /*!is_noexcept*/);
TypeDescr return_td = explicit_return_td ? explicit_return_td : body->valuetype();
TypeDescr lambda_td
= TypeDescrBase::require_by_fn_info(function_info);
return lambda_td;
return assemble_lambda_td(argv, return_td);
}
std::string
@ -78,6 +84,15 @@ namespace xo {
return ss.str();
}
rp<Lambda>
Lambda::make(const std::string & name,
TypeDescr lambda_td,
const rp<LocalEnv> & env,
const rp<Expression> & body)
{
return new Lambda(name, lambda_td, env, body);
}
rp<Lambda>
Lambda::make_from_env(const std::string & name,
const rp<LocalEnv> & env,

View file

@ -11,6 +11,18 @@ namespace xo {
bool type_ref::is_concrete() const { return td_ != nullptr; }
type_ref
type_ref::dwim(prefix_type prefix, TypeDescr td)
{
if (td) {
/** type resolved, type variable not needed **/
return type_ref(type_var(), td);
} else {
/** type not resolved, assign a unique type variable **/
return type_ref(generate_unique(prefix), td);
}
}
auto
type_ref::generate_unique(xo::scm::prefix_type prefix) -> xo::scm::type_var
{

View file

@ -212,6 +212,18 @@ namespace xo {
};
}
rp<TypeBlueprint>
type_unifier::lookup(const type_var & name) const
{
auto ix = constraint_map_.find(name);
if (ix != constraint_map_.end()) {
return ix->second;
} else {
return nullptr;
}
}
} /*namespace scm*/
} /*namespace xo*/

View file

@ -77,6 +77,7 @@ namespace xo {
parserstatemachine * p_psm) override;
virtual void print(std::ostream & os) const override;
virtual bool pretty_print(const xo::print::ppindentinfo & ppii) const override;
private:
static std::unique_ptr<expect_expr_xs> make(bool allow_defs,

View file

@ -73,6 +73,8 @@ namespace xo {
parserstatemachine * p_psm) override;
virtual void on_semicolon_token(const token_type & tk,
parserstatemachine * p_psm) override;
virtual void on_rightbrace_token(const token_type & tk,
parserstatemachine * p_psm) override;
virtual void on_expr(bp<Expression> expr,
parserstatemachine * p_psm) override;
@ -84,6 +86,11 @@ namespace xo {
private:
static std::unique_ptr<if_else_xs> make();
/** exit this exprstate,
* and deliver @ref if_expr_ to parent exprstate
**/
void finish_and_continue(parserstatemachine * p_psm);
private:
ifexprstatetype ifxs_type_;
/** scaffold output expression here **/

View file

@ -98,6 +98,9 @@ namespace xo {
/** explicit return type (if supplied) **/
TypeDescr explicit_return_td_ = nullptr;
/** lambda signature (when known) **/
TypeDescr lambda_td_ = nullptr;
/** body expression **/
rp<Expression> body_;

View file

@ -9,6 +9,7 @@
#include "exprstatestack.hpp"
#include "envframestack.hpp"
#include "parser_result.hpp"
#include "xo/expression/typeinf/type_unifier.hpp"
namespace xo {
namespace scm {
@ -39,7 +40,11 @@ namespace xo {
std::unique_ptr<exprstate> pop_exprstate();
exprstate & top_exprstate();
/** get exprstate @p i levels from the top **/
const exprstate & lookup_exprstate(size_t i) const;
void push_exprstate(std::unique_ptr<exprstate> x);
/** @return number of stacked expression states **/
size_t exprstate_stack_size() const { return xs_stack_.size(); }
bool debug_flag() const { return debug_flag_; }
@ -55,6 +60,8 @@ namespace xo {
/** @return available variable bindings in current parsing state **/
bp<LocalEnv> top_envframe() const;
/** @return frame @p i levels from the top **/
bp<LocalEnv> lookup_envframe(std::size_t i) const;
/** push frame @p x (with new variable bindings) onto environment stack **/
void push_envframe(const rp<LocalEnv> & x);
/** @return pop innermost environment frame and return it **/
@ -102,7 +109,11 @@ namespace xo {
* pop when lambda body goes out of scope
**/
envframestack env_stack_;
/** parser result state **/
/** type inference/unification.
* apply to unresolved types associated with @ref Expression instances.
**/
type_unifier unifier_;
/** parser result state (as of last call to @ref exprstate::on_input) **/
parser_result result_;
/** enable/disable debug logging **/
bool debug_flag_ = false;

View file

@ -261,6 +261,14 @@ namespace xo {
<< ">";
}
bool
expect_expr_xs::pretty_print(const xo::print::ppindentinfo & ppii) const
{
return ppii.pps()->pretty_struct(ppii, "expect_expr_xs",
refrtag("allow_defs", allow_defs_),
refrtag("cxl_on_rightbrace", cxl_on_rightbrace_));
}
} /*namespace scm*/
} /*namespace xo*/

View file

@ -164,6 +164,30 @@ namespace xo {
this->illegal_input_on_token(c_self_name, tk, exp, p_psm);
}
void
if_else_xs::finish_and_continue(parserstatemachine * p_psm)
{
rp<IfExprAccess> if_expr = this->if_expr_;
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
if (this->ifxs_type_ == ifexprstatetype::if_4) {
/* if no else-branch, then if-expr can't have valuetype */
if_expr->assign_valuetype(nullptr);
}
p_psm->top_exprstate().on_expr(if_expr, p_psm);
}
void
if_else_xs::on_rightbrace_token(const token_type & tk,
parserstatemachine * p_psm)
{
scope log(XO_DEBUG(p_psm->debug_flag()));
this->finish_and_continue(p_psm);
p_psm->on_rightbrace_token(tk);
}
void
if_else_xs::on_semicolon_token(const token_type & tk,
parserstatemachine * p_psm)
@ -180,24 +204,18 @@ namespace xo {
case ifexprstatetype::n_ifexprstatetype:
// unreachable
assert(false);
return;
break;
case ifexprstatetype::if_1:
case ifexprstatetype::if_2:
case ifexprstatetype::if_3:
case ifexprstatetype::if_5:
this->illegal_input_on_token(c_self_name, tk, get_expect_str(), p_psm);
return;
break;
case ifexprstatetype::if_4:
case ifexprstatetype::if_6: {
rp<IfExprAccess> if_expr = this->if_expr_;
std::unique_ptr<exprstate> self = p_psm->pop_exprstate();
TypeDescr td = nullptr;
if_expr->assign_valuetype(td);
p_psm->top_exprstate().on_expr(if_expr, p_psm);
return;
this->finish_and_continue(p_psm);
break;
}
}
}

View file

@ -1,6 +1,7 @@
/* @file lambda_xs.cpp */
#include "lambda_xs.hpp"
#include "define_xs.hpp"
#include "parserstatemachine.hpp"
#include "exprstatestack.hpp"
#include "expect_formal_arglist_xs.hpp"
@ -156,9 +157,55 @@ namespace xo {
{
constexpr const char * c_self_name = "lambda_xs::on_typedescr";
assert(td);
if (lmxs_type_ == lambdastatetype::lm_3) {
this->lmxs_type_ = lambdastatetype::lm_4;
this->explicit_return_td_ = td;
this->lambda_td_ = Lambda::assemble_lambda_td(local_env_->argv(),
explicit_return_td_);
/* 1. at this point we know function signature (@ref lambda_td_)
* 2. if this lambda appears on the rhs of a define,
* propagate function signature to the define.
* 3. this makes recursive function definitions like this work
* without relying on type inference:
* def fact = lambda (n : i64) : i64 {
* if (n == 0) then
* 1
* else
* n * fact(n - 1)
* }
* 4. while parsing the body of the lambda, we want environment
* to already associate the lambda's signature with variable 'fact',
* so that when parser encounters 'fact(n - 1)' the expression has
* known valuetype.
*/
if ((p_psm->exprstate_stack_size() >= 3)
&& (p_psm->lookup_exprstate(1).exs_type() == exprstatetype::expect_rhs_expression)
&& (p_psm->lookup_exprstate(2).exs_type() == exprstatetype::defexpr)
&& (p_psm->env_stack_size() >= 2)
)
{
bp<LocalEnv> def_env = p_psm->lookup_envframe(1);
assert(def_env->n_arg() == 1);
bp<Variable> def_var = def_env->lookup_arg(0).get();
if (def_var->valuetype() == nullptr) {
def_var->assign_valuetype(lambda_td_);
} else {
/* don't need to unify here. if def already hasa a type,
* that's because it was explicitly specified.
* will discover any conflict after reporting parsed lambda
* to define_xs
*/
}
}
expect_expr_xs::start(p_psm);
/* control reenters via .on_expr() or .on_expr_with_semicolon() */
} else {
@ -194,7 +241,16 @@ namespace xo {
/* top env frame recorded arguments to this lambda */
p_psm->pop_envframe();
rp<Lambda> lm = Lambda::make_from_env(name, local_env_, explicit_return_td_, body_);
rp<Lambda> lm;
/* TODO: unify explicit_return_td_ with body_ */
if (lambda_td_) {
lm = Lambda::make(name, lambda_td_, local_env_, body_);
} else {
lm = Lambda::make_from_env(name, local_env_,
explicit_return_td_, body_);
}
p_psm->top_exprstate().on_expr(lm, p_psm);
p_psm->top_exprstate().on_semicolon_token(tk, p_psm);

View file

@ -52,8 +52,7 @@ namespace xo {
const parser_result &
parser::include_token(const token_type & tk)
{
constexpr bool c_debug_flag = true;
scope log(XO_DEBUG(c_debug_flag), xtag("tk", tk));
scope log(XO_DEBUG(psm_.debug_flag()), xtag("tk", tk));
if (psm_.xs_stack_.empty()) {
throw std::runtime_error(tostr("parser::include_token",

View file

@ -35,6 +35,11 @@ namespace xo {
return xs_stack_.top_exprstate();
}
const exprstate &
parserstatemachine::lookup_exprstate(size_t i) const {
return *xs_stack_[i];
}
void
parserstatemachine::push_exprstate(std::unique_ptr<exprstate> x) {
xs_stack_.push_exprstate(std::move(x));
@ -45,6 +50,11 @@ namespace xo {
return env_stack_.top_envframe();
}
bp<LocalEnv>
parserstatemachine::lookup_envframe(std::size_t i) const {
return env_stack_[i];
}
void
parserstatemachine::push_envframe(const rp<LocalEnv> & x) {
constexpr bool c_debug_flag = true;