xo-expression xo-reader: type unifier + misc improvements

This commit is contained in:
Roland Conybeare 2025-07-26 17:28:41 -04:00
commit 75b74918b7
31 changed files with 1005 additions and 76 deletions

View file

@ -15,10 +15,15 @@ set(SELF_SRCS
LocalEnv.cpp
ConvertExpr.cpp
Primitive.cpp
typeinf/type_ref.cpp
typeinf/type_unifier.cpp
typeinf/TypeBlueprint.cpp
)
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
# note: deps here must also appear in cmake/xo_expressionConfig.cmake.in
xo_dependency(${SELF_LIB} reflect)
xo_dependency(${SELF_LIB} xo_flatstring)
#xo_dependency(${SELF_LIB} indentlog)
#xo_dependency(${SELF_LIB} subsys)

View file

@ -20,6 +20,7 @@ namespace xo {
namespace ast {
TypeDescr
Lambda::assemble_lambda_td(const std::vector<rp<Variable>> & argv,
TypeDescr explicit_return_td,
const rp<Expression> & body)
{
if (!body)
@ -39,8 +40,16 @@ namespace xo {
}
}
if (explicit_return_td && body->valuetype() && (explicit_return_td != body->valuetype())) {
throw std::runtime_error(tostr("explicit lambda return type T1 conflicts with lambda body T2",
xtag("T1", explicit_return_td),
xtag("T2", body->valuetype())));
}
// TODO: unify(explicit_return_td, body->valuetype())
auto function_info
= FunctionTdxInfo(body->valuetype(),
= FunctionTdxInfo(explicit_return_td ? explicit_return_td : body->valuetype(),
arg_td_v,
false /*!is_noexcept*/);
@ -72,9 +81,10 @@ namespace xo {
rp<Lambda>
Lambda::make_from_env(const std::string & name,
const rp<LocalEnv> & env,
TypeDescr explicit_return_td,
const rp<Expression> & body)
{
TypeDescr lambda_td = assemble_lambda_td(env->argv(), body);
TypeDescr lambda_td = assemble_lambda_td(env->argv(), explicit_return_td, body);
rp<Lambda> retval
= new Lambda(name,
@ -96,7 +106,9 @@ namespace xo {
{
rp<LocalEnv> env = LocalEnv::make(argv, parent_env);
return make_from_env(name, env, body);
TypeDescr explicit_return_td = nullptr;
return make_from_env(name, env, explicit_return_td, body);
} /*make*/
std::set<std::string>
@ -168,8 +180,9 @@ namespace xo {
void
Lambda::complete_assembly_from_body() {
if (body_) {
TypeDescr explicit_return_td = nullptr;
TypeDescr lambda_td
= assemble_lambda_td(this->local_env_->argv(), body_);
= assemble_lambda_td(this->local_env_->argv(), explicit_return_td, body_);
if (lambda_td)
this->type_str_ = assemble_type_str(lambda_td);
@ -328,32 +341,6 @@ namespace xo {
refrtag("name", name_),
refrtag("argv", local_env_->argv()),
refrtag("body", body_));
#ifdef OBSOLETE
ppstate * pps = ppii.pps();
if (ppii.upto()) {
if (!pps->print_upto("<Lambda"))
return false;
if (!pps->print_upto_tag("name", name_))
return false;
if (!pps->print_upto_tag("argv", local_env_->argv()))
return false;
if (!pps->print_upto_tag("body", body_))
return false;
pps->write(">");
return true;
} else {
pps->write("<Lambda");
pps->newline_pretty_tag(ppii.ci1(), "name", name_);
pps->newline_pretty_tag(ppii.ci1(), "argv", local_env_->argv());
pps->newline_pretty_tag(ppii.ci1(), "body", body_);
pps->write(">");
return false;
}
#endif
}
// ----- Lambda Access -----
@ -364,7 +351,8 @@ namespace xo {
const rp<Expression> & body,
const rp<Environment> & parent_env)
{
TypeDescr lambda_td = assemble_lambda_td(argv, body);
TypeDescr explicit_return_td = nullptr;
TypeDescr lambda_td = assemble_lambda_td(argv, explicit_return_td, body);
rp<LocalEnv> env = LocalEnv::make(argv, parent_env);
rp<LambdaAccess> retval

View file

@ -0,0 +1,112 @@
/** @file TypeBlueprint.cpp **/
#include "typeinf/TypeBlueprint.hpp"
namespace xo {
namespace scm {
TypeBlueprint::TypeBlueprint(const type_ref & x)
: ref_{x}
{}
rp<TypeBlueprint>
TypeBlueprint::make(const type_ref & ref)
{
return new TypeBlueprint(ref);
}
rp<TypeBlueprint>
TypeBlueprint::typevar(const type_var & name)
{
return new TypeBlueprint(type_ref(name, nullptr));
}
bool
TypeBlueprint::equals(bp<TypeBlueprint> lhs, bp<TypeBlueprint> rhs)
{
// 1. two concrete blueprints are equal if they resolve to the same type.
// 2. two type variables are equal if they have the same unique name;
// but: once we introduce structural constraints will relax this
if (lhs->is_concrete() && rhs->is_concrete())
{
return lhs->td() == rhs->td();
}
if (lhs->id() == rhs->id())
{
// typevar names are globally unique,
// so two typevars with the same name must refer to the same type
return true;
}
// TODO: structural comparisons..
return false;
}
bool
TypeBlueprint::is_variable() const
{
// TODO;
// if we have structural information about this type,
// e.g. vector[t'] or function(a' -> b'),
// then must return false here
return !ref_.is_concrete();
}
void
TypeBlueprint::upsert_typevars(std::set<type_var> * p_typevar_set) const
{
if (this->is_concrete()) {
return;
}
// TODO: handle structural types
p_typevar_set->insert(ref_.id());
}
bp<TypeBlueprint>
TypeBlueprint::substitute(const type_substitution_map& sub_map)
{
bp<TypeBlueprint> subject = this;
// loop here should only run once.
// we collapse sub_map whenever we extend it.
//
while(!subject->is_concrete()) {
auto ix = sub_map.find(subject->id());
if (ix == sub_map.end())
break;
subject = ix->second.get();
}
// TODO: also want to update the whole chain,
// so that everything refers to final subjectc
return subject;
}
void
TypeBlueprint::resolve_to(TypeDescr td)
{
ref_.resolve_to(td);
}
void
TypeBlueprint::display(std::ostream & os) const
{
os << "<TypeBlueprint";
os << xtag("id", id());
if (td())
os << xtag("td", td()->canonical_name());
os << ">";
}
} /*namespace scm*/
} /*namespace xo*/
/** end TypeBlueprint.cpp **/

View file

@ -0,0 +1,48 @@
/** @file type_ref.cpp **/
#include "typeinf/type_ref.hpp"
namespace xo {
namespace scm {
type_ref::type_ref(const type_var& id, TypeDescr td)
: id_{id}, td_{td}
{}
bool type_ref::is_concrete() const { return td_ != nullptr; }
auto
type_ref::generate_unique(xo::scm::prefix_type prefix) -> xo::scm::type_var
{
static uint32_t s_counter = 0;
s_counter = (s_counter + 1) % 100000000;
char buf [type_var::fixed_capacity];
int n = snprintf(buf, sizeof(buf), "%s:%u", prefix.c_str(), s_counter);
assert(n < static_cast<int>(type_var::fixed_capacity));
// not necessary, but remove all doubt
// max:
// 7 chars for prefix
// 8 chars for u32 % 1000000000
//
buf [type_var::fixed_capacity - 1] = '\0';
return buf;
}
void
type_ref::resolve_to(TypeDescr td)
{
assert(!td_);
this->td_ = td;
}
} /*namespace scm*/
} /*namespace xo*/
/** end type_ref.cpp **/

View file

@ -0,0 +1,218 @@
/** @file type_unifier.cpp
*
* author: Roland Conybeare, Jul 2025
**/
#include "typeinf/type_unifier.hpp"
#include "xo/indentlog/print/tag.hpp"
namespace xo {
namespace scm {
std::ostream &
operator<< (std::ostream & os,
const unify_result & x)
{
os << "<unify_result"
<< xtag("success", x.success_)
<< xtag("unified", x.unified_);
if (x.error_src_function_)
os << xtag("error_src_function", x.error_src_function_);
if (!x.error_description_.empty())
os << xtag("error_description", x.error_description_);
os << ">";
return os;
}
unify_result
type_unifier::occurs_error(const char * src_function,
bp<TypeBlueprint> t1,
bp<TypeBlueprint> t2,
bp<TypeBlueprint> s1,
bp<TypeBlueprint> s2)
{
// unification implies some infinite type,
// e.g. unify a' with (i64 -> 'a)
// would imply type (i64 -> i64 -> i64 -> ...)
return {
.success_ = false,
.unified_ = nullptr,
.error_src_function_ = src_function,
.error_description_ = tostr("attempting unify(T1,T2) with T1 -> S1, T2 -> S2",
": occurs check failed with S1 occuring in S2",
xrefrtag("T1", t1),
xrefrtag("T2", t2),
xrefrtag("S1", s1),
xrefrtag("S2", s2))
};
};
unify_result
type_unifier::unify(bp<TypeBlueprint> lhs, bp<TypeBlueprint> rhs)
{
/** if we already have substitutions for either of {lhs, rhs}, use them **/
auto lhs1 = lhs->substitute(constraint_map_);
auto rhs1 = rhs->substitute(constraint_map_);
/** reminder:
* 1. lhs1, rhs1 need not be in constraint_map,
* 2. lhs1, rhs1 need not be distinct from lhs, rhs respectively
**/
if (TypeBlueprint::equals(lhs1, rhs1)) {
// blueprints are already equivalent on their face.
// this recognizes matching concrete types.
//
// return the lexicographically earlier id as canonical representative
bp<TypeBlueprint> canonical = (lhs1->id() < rhs1->id()) ? lhs1 : rhs1;
return {
.success_ = true,
.unified_ = canonical.promote(),
.error_src_function_ = nullptr,
.error_description_ = ""
};
}
assert(lhs1->id() != rhs1->id());
constexpr const char * c_self_name = "type_unifier::unify";
bp<TypeBlueprint> canonical;
/** if both lhs1 and rhs1 are type variables,
* pick the lexicographically earlier one as canonical name.
* (already know they're distinct because did not satisfy equality test above)
*
* prefer the canonical name as rhs target of all substitutions
* from known-to-be-equivalent typevars.
**/
if (lhs1->is_variable())
{
if (rhs1->is_variable())
{
// haven't resolved anything yet, but we do know
// that type variables lhs,rhs,lhs1,rhs1 must refer to the same type
if (lhs1->ref().id() < rhs1->ref().id()) {
canonical = lhs1;
constraint_map_[rhs1->id()] = lhs1.promote();
} else {
canonical = rhs1;
constraint_map_[lhs1->id()] = rhs1.promote();
}
} else if (rhs1->is_concrete()) {
canonical = (lhs1->id() < rhs1->id()) ? lhs1 : rhs1;
// update lhs, lhs1 to refer to resolved rhs1.
// rhs would already have been resolved
assert(rhs->td() == rhs1->td());
lhs1->resolve_to(rhs1->td());
if (lhs->id() != lhs1->id())
lhs->resolve_to(rhs1->td());
} else {
// 1. lhs1->is_variable()
// 2. !rhs1->is_variable() && !rhs1->is_concrete()
//
// therefore need occurs check for lhs1 appearing in rhs1
std::set<type_var> rhs1_typevar_set;
rhs1->upsert_typevars(&rhs1_typevar_set);
if (rhs1_typevar_set.contains(lhs1->id())) {
return type_unifier::occurs_error(c_self_name,
lhs, rhs, lhs1, rhs1);
}
// TODO: some sort of recursive unification here
assert(false);
}
} else if (rhs1->is_variable())
{
assert(!rhs1->is_concrete());
if (lhs1->is_concrete())
{
canonical = (lhs1->id() < rhs1->id()) ? lhs1 : rhs1;
// update rhs, rhs1 to refer to resolved lhs1.
// lhs would already have been resolved
assert(lhs->td() == lhs1->td());
rhs1->resolve_to(lhs1->td());
if (rhs->td() != rhs1->td())
rhs->resolve_to(lhs1->td());
} else
{
// 1. !lhs1->is_variable() && !lhs1->is_concrete()
// 2. rhs1->is_variable()
//
// Need occurs check for rhs1 appearing in lhs1
std::set<type_var> lhs1_typevar_set;
lhs1->upsert_typevars(&lhs1_typevar_set);
if (lhs1_typevar_set.contains(rhs1->id())) {
return type_unifier::occurs_error(c_self_name,
rhs, lhs, rhs1, lhs1);
}
// TODO: some sort of recursive unification here
assert(false);
}
} else if (lhs1->is_concrete() && rhs1->is_concrete())
{
/* we already know lhs1 != rhs1 -> unification failure */
return {
.success_ = false,
.unified_ = nullptr,
.error_src_function_ = c_self_name,
.error_description_ = tostr("attempting unify(T1,T2) with T1 -> S1, T2 -> S2",
": incompatible concrete types S1,S2",
xrefrtag("T1", lhs),
xrefrtag("T2", rhs),
xrefrtag("S1", lhs1),
xrefrtag("S2", rhs1))
};
}
// TODO: recursive unification for structural types, function types etc.
if (canonical)
{
constraint_map_[lhs1->id()] = canonical.promote();
constraint_map_[rhs1->id()] = canonical.promote();
if (!constraint_map_.contains(lhs1->id()))
constraint_map_[lhs1->id()] = canonical.promote();
if (!constraint_map_.contains(rhs1->id()))
constraint_map_[rhs1->id()] = canonical.promote();
return {
.success_ = true,
.unified_ = canonical.promote(),
.error_src_function_ = nullptr,
.error_description_ = ""
};
}
assert(false);
return {
.success_ = false,
.unified_ = nullptr,
.error_src_function_ = c_self_name,
.error_description_ = tostr("attempting unify(T1,T2) with T1 -> S1, T2 -> S2",
"supposedly-unreachable case for S1,S2",
xrefrtag("T1", lhs),
xrefrtag("T2", rhs),
xrefrtag("S1", lhs1),
xrefrtag("S2", rhs1))
};
}
} /*namespace scm*/
} /*namespace xo*/
/** end type_unifier.cpp **/