xo-alloc: GC mutation log works for full GC

This commit is contained in:
Roland Conybeare 2025-08-12 00:16:00 -05:00
commit 258555e9eb
2 changed files with 268 additions and 26 deletions

View file

@ -40,6 +40,11 @@ namespace xo {
* Will allocate more space as needed
**/
std::size_t initial_tenured_z_ = 0;
/** trigger incremental GC after this many bytes allocated in nursery **/
std::size_t incr_gc_threshold_ = 64*1024;
/** trigger full GC after this many bytes promoted to tenured **/
std::size_t full_gc_threshold_ = 512*1024;
/** true to permit incremental garbage collection **/
bool allow_incremental_gc_ = true;
/** true to report statistics **/
@ -118,6 +123,7 @@ namespace xo {
**/
static up<GC> make(const Config & config);
const Config & config() const { return config_; }
const GCRunstate & runstate() const { return runstate_; }
const GcStatistics & native_gc_statistics() const { return gc_statistics_; }
GcStatisticsExt get_gc_statistics() const;
@ -126,6 +132,19 @@ namespace xo {
bool is_gc_enabled() const { return gc_enabled_ == 0; }
/** true during (and only during) a GC cycle **/
bool gc_in_progress() const { return runstate_.in_progress(); }
/** @return committed size of Nursery to-space **/
std::size_t nursery_to_committed() const;
/** @return nursery bytes used before checkpoint **/
std::size_t nursery_before_checkpoint() const;
/** @return nursery bytes used after checkpoint **/
std::size_t nursery_after_checkpoint() const;
/** @return committed size of Tenured to-space **/
std::size_t tenured_to_committed() const;
/** @return tenured bytes used before checkpoint **/
std::size_t tenured_before_checkpoint() const;
/** @return tenured bytes used after checkpoint = promoted since last GC **/
std::size_t tenured_after_checkpoint() const;
/** @return generation to which object at @p x belongs **/
generation_result tospace_generation_of(const void * x) const;
/** @return generation that contains @p x, given it's in from-space **/
@ -232,7 +251,6 @@ namespace xo {
* (T->N, aka xgen) and (N1->N0, aka xckp) pointers
**/
void incremental_gc_forward_mlog(ObjectStatistics * per_type_stats);
/**
* Aux function for @ref incremental_gc_forward_mlog. Calls this function until
* fixpoint.
@ -246,6 +264,23 @@ namespace xo {
MutationLog * to_mlog,
MutationLog * defer_mlog,
ObjectStatistics * per_type_stats);
/** Aux function for @ref execute_gc. Updates bookkeeping for cross-generational
* (T->N, aka xgen) and (N1->N0, aka xckcp) pointers on full gc
**/
void full_gc_forward_mlog(ObjectStatistics * per_type_stats);
/**
* Aux function for @ref full_gc_forward_mlog. Calls this function until fixpoint.
*
* @param from_mlog incoming mutation log. Contains {xgen,xckp} pointers before GC.
* Contents of this log is consumed (+discarded) before method returns.
* @param to_mlog outgoing mutation log. Will contain {xgen,xckp} pointers after GC.
* @param defer_mlog contains log entries associated with possible garbage.
*
**/
void full_gc_forward_mlog_phase(MutationLog * from_mlog,
MutationLog * to_mlog,
MutationLog * defer_mlog,
ObjectStatistics * per_type_stats);
private:
/** garbage collector configuration **/
@ -292,11 +327,6 @@ namespace xo {
/** optional per-object-type counters. snapshot at end of collection cycle **/
std::array<ObjectStatistics, gen2int(generation::N)> object_statistics_sae_;
/** trigger full GC whenever this much data arrives in tenured generation **/
std::size_t full_gc_threshold_ = 0;
/** trigger incr GC whenever this much data arrives in nuresery generation **/
std::size_t incr_gc_threshold_ = 0;
/** true when GC requested,
* remains true until GC.. completes? begins?
**/

View file

@ -163,13 +163,13 @@ namespace xo {
std::size_t
GC::before_checkpoint() const
{
return nursery_[role2int(role::to_space)]->before_checkpoint();
return this->nursery_to()->before_checkpoint();
}
std::size_t
GC::after_checkpoint() const
{
return nursery_[role2int(role::to_space)]->after_checkpoint();
return this->nursery_to()->after_checkpoint();
}
bool
@ -197,16 +197,34 @@ namespace xo {
return nursery_to()->committed();
}
generation_result
GC::fromspace_generation_of(const void * x) const
std::size_t
GC::nursery_before_checkpoint() const
{
if (tenured_[role2int(role::from_space)]->contains(x))
return generation_result::tenured;
return nursery_to()->before_checkpoint();
}
if (nursery_[role2int(role::from_space)]->contains(x))
return generation_result::nursery;
std::size_t
GC::nursery_after_checkpoint() const
{
return nursery_to()->after_checkpoint();
}
return generation_result::not_found;
std::size_t
GC::tenured_to_committed() const
{
return tenured_to()->committed();
}
std::size_t
GC::tenured_before_checkpoint() const
{
return tenured_to()->before_checkpoint();
}
std::size_t
GC::tenured_after_checkpoint() const
{
return tenured_to()->after_checkpoint();
}
generation_result
@ -221,6 +239,18 @@ namespace xo {
return generation_result::not_found;
}
generation_result
GC::fromspace_generation_of(const void * x) const
{
if (tenured_[role2int(role::from_space)]->contains(x))
return generation_result::tenured;
if (nursery_[role2int(role::from_space)]->contains(x))
return generation_result::nursery;
return generation_result::not_found;
}
std::byte *
GC::free_ptr(generation gen)
{
@ -262,16 +292,26 @@ namespace xo {
void
GC::checkpoint()
{
nursery_[role2int(role::to_space) ]->checkpoint();
nursery_to()->checkpoint();
/* checkpoint T generation so we can trigger GC based on new T objects rather than
* overall T size
*/
tenured_to()->checkpoint();
}
std::byte *
GC::alloc(std::size_t z)
{
std::byte * x = nursery_[role2int(role::to_space)]->alloc(z);
auto N_to = this->nursery_to();
if (!incr_gc_pending_ && (N_to->after_checkpoint() > config_.incr_gc_threshold_)) {
/* automatically ups to generation::tenured */
this->request_gc(generation::nursery);
}
std::byte * x = N_to->alloc(z);
/* ListAlloc won't fail unless we exhaust memory -- instead will increase heap size */
assert(x);
return x;
@ -291,17 +331,17 @@ namespace xo {
{
log && log("tenured");
retval = tenured_[role2int(role::to_space)]->alloc(z);
retval = this->tenured_to()->alloc(z);
}
break;
case generation_result::nursery:
{
if (nursery_[role2int(role::from_space)]->is_before_checkpoint(src))
if (this->nursery_from()->is_before_checkpoint(src))
{
/* nursery object has survived 2nd collection cycle
* -> promote into tenured generation
*/
retval = tenured_[role2int(role::to_space)]->alloc(z);
retval = this->tenured_to()->alloc(z);
log && log("promote", xtag("addr", (void*)retval));
@ -311,7 +351,7 @@ namespace xo {
} else {
log && log("nursery");
retval = nursery_[role2int(role::to_space)]->alloc(z);
retval = this->nursery_to()->alloc(z);
}
}
break;
@ -427,7 +467,7 @@ namespace xo {
/* gc on tenured generation may need this much space */
std::size_t need_tenured_z = (tenured_[role2int(role::to_space)]->allocated()
+ max_promote_z
+ full_gc_threshold_);
+ config_.full_gc_threshold_);
log && log("need_tenured_z", need_tenured_z);
@ -449,7 +489,7 @@ namespace xo {
/* subtracting max_promote_z is correct here, since anything not promoted is garbage */
std::size_t need_nursery_z = (nursery(role::to_space)->allocated()
- max_promote_z
+ incr_gc_threshold_);
+ config_.incr_gc_threshold_);
log && log(xtag("need_nursery_z", need_nursery_z));
@ -695,6 +735,139 @@ namespace xo {
}
}
void
GC::full_gc_forward_mlog_phase(MutationLog * from_mlog,
MutationLog * to_mlog,
MutationLog * defer_mlog,
ObjectStatistics * /*per_type_stats*/)
{
scope log(XO_DEBUG(config_.debug_flag_), xtag("from_mlog.size", from_mlog->size()));
/* categorize pointers based on combination of {source address, destination address},
* only care about the generation associated with an address.
*
* N0 : nursery(from), before checkpoint
* N0': nursery(to), before checkpoint
* N1 : nursery(from), after checkpoint
* N1': nursery(to), after checkpoint
* T : tenured(from)
* T': tenured(to)
*
* loc(P): parent region before GC
* loc(C): child region before GC
*
* | | forwarded | loc now post | loc after |
* | | already? | root copy | action |
* | loc(P) loc(C) | P C | P' C' | P' C' | defer | action
* ----|---------------+--------------+---------------+---------------+-------+---------------
* (a) | T N0 | no no | T N0 | T N0 | P ->C | defer
* (b) | | yes | N1' | N1' | P ->C'| defer
* | | yes no | impossible
* (b2)| | yes | T' N1' | T' N1' | | +mlog
* (c) | T N1 | no no | T N1 | T T | P ->C | defer
* (d) | | yes | T T' | T T' | P ->C'| defer
* | | yes no | impossible
* (d2)| | yes | T' T' | T' T' | | -mlog
* (e) | N1 N0 | no no | N1 N0 | N1 N0 | P ->C | defer
* (f) | | yes | N1 N1' | N1 N1' | P ->C'| defer
* | | yes no | impossible
* (g) | | yes yes | T' N1' | T' N1' | | +mlog
*
* notes:
* (a) P,C maybe garbage. don't move either, but defer mlog incase P saved by a subsequent mutation;
* in that case C saved also, + will still have an xgen ptr, and still need an mlog entry.
* (b) C already evac'd, but P maybe garbage. defer mlog incase P rescued by a subsequent mutation;
* in that case will still have an xgen (T -> N) ptre, and still need an mlog entry.
* (b2) P,C already evac'd. Must update+rembexember xgen ptr {T -> N1}
* (c) P,C maybe garbage. don't move either, but defer mlog in case P saved by a subsequent mutation;
* in that case C promoted, no longer xgen
* (d) P maybe garbage. defer in case P saved by a subsequent mutation.
* C now tenured, so will no longer have an xgen pointer.
* (d2) P,C already evac'd. After collection no longer have xgen pointer, so no mlog.
* (e) P,C maybe garbage. don't move either, but defer mlog incase P saved by a subsequent mutation.
* in that case C saved alto, + will still have an xgen ptr, so still need an mlog entry
* (f) P maybe garbage, C survives. defer mlog incase P saved+promoted by a subsequent mutation;
* in that case will still have an xgen (T -> N) ptr, so still need an mlog entry.
* (g) P,C already evac'd. Still have xgen pointer, must mlog
*/
std::size_t i_from = 0;
// number of rescued subgraphs via mutation log entries
std::size_t n_rescue = 0;
for (MutationLogEntry & from_entry : *from_mlog)
{
log && (i_from % 10000 == 0) && log(xtag("i_from", i_from));
if (from_entry.is_parent_forwarded()) {
Object * parent_to = from_entry.parent_destination();
log && log(xtag("parent_to", (void*)parent_to));
assert(tospace_generation_of(parent_to) == generation_result::tenured);
MutationLogEntry to_entry = from_entry.update_parent_moved(parent_to);
// note: child obtained (as it must be) by reading from prarent's memory _now_.
// Since parent has moved, child has too
Object * child_to = to_entry.child(); // after moveing
if (tospace_generation_of(parent_to) == generation_result::tenured)
{
// cases (b2)(d2)(g), loc(P) is T'
// In all these cases parent has already been moved;
// therefore child has also been moved.
// Just need to decide whether to keep mlog entry
if (from_entry.is_dead()) {
// obsolete mutation -- no longer belongs to parent, discard
} else if (child_to) {
assert(!child_to->_is_forwarded());
if (tospace_generation_of(child_to) == generation_result::nursery) {
// case
// (b2) loc(P')=T', loc(C')=N1' --> +mlog
// (g) loc(P')=T', loc(C')=N1' --> +mlog
//
to_mlog->push_back(to_entry);
} else {
// case
// (d2) loc(P')=T', loc(C')=T' --> -mlog
}
}
} else {
// impossible - wouldn't have made mlog entry
assert(false);
}
} else {
// case
// (a) defer
// (b) defer
// (c) defer
// (d) defer
// (e) defer
// (f) defer
defer_mlog->push_back(from_entry);
}
++i_from;
}
from_mlog->clear();
if (n_rescue == 0) {
// if we didn't rescue any objects
// then we now confirm that otherwise-unreachable parents in defer_mlog
// are garbage
defer_mlog->clear();
}
}
void
GC::incremental_gc_forward_mlog(ObjectStatistics * per_type_stats)
{
@ -703,7 +876,7 @@ namespace xo {
* - gc roots have been copied, along with everything reachable from them.
*
* plan:
* - forward mutation in *from_mutation_log, writing them to
* - forward mutations in *from_mutation_log, writing them to
* *to_mutationlog and/or *defer_mutation_log.
* Use defer when mutation P->C encountered, but P was not copied.
* P appears to be garbage, but may turn out to be live if encountered
@ -743,13 +916,52 @@ namespace xo {
}
}
void
GC::full_gc_forward_mlog(ObjectStatistics * per_type_stats)
{
/* control here:
* - full gc.
* - gc roots have been copied, along with everything reachable
* from them.
*
* plan:
* - forward mutations in *from_mutation_log, writing them to
* *to_mutation_log and/or *defer_mutation_log.
*/
MutationLog * to_mlog = this->mutation_log(role::to_space);
for (;;) {
MutationLog * from_mlog = this->mutation_log(role::from_space);
MutationLog * defer_mlog = defer_mutation_log_.get();
this->full_gc_forward_mlog_phase(from_mlog,
to_mlog,
defer_mlog,
per_type_stats);
assert(from_mlog->empty());
if (defer_mlog->empty())
break;
/* control here:
* 1. at least one mlog triggered a rescue
* 2. at least one mlog was deferred (had otherwise-unreachable parent)
*
* possible that deferred parent is now reachable thanks to a rescue;
* to confirm/refute this need to revisit entries in defer_mlog.
*/
std::swap(mutation_log_[role2int(role::from_space)], defer_mutation_log_);
}
}
void
GC::forward_mutation_log(generation upto)
{
scope log(XO_DEBUG(config_.debug_flag_));
if (upto == generation::tenured) {
log && log("TODO: forward mutation log for full GC");
this->full_gc_forward_mlog(&object_statistics_sae_[gen2int(generation::tenured)]);
} else {
this->incremental_gc_forward_mlog(&object_statistics_sae_[gen2int(generation::nursery)]);
}
@ -874,7 +1086,7 @@ namespace xo {
target = generation::tenured;
if ((target == generation::nursery)
&& (tenured_[role2int(role::to_space)]->after_checkpoint() > full_gc_threshold_))
&& (this->tenured_to()->after_checkpoint() > config_.full_gc_threshold_))
{
/** full collection when >= @ref full_gc_threshold_ bytes added to tenured
* generation, since last full collection