477 lines
18 KiB
Rust
477 lines
18 KiB
Rust
use crate::{ObjectHeader, ObjectKind};
|
|
use prometeu_bytecode::{HeapRef, Value};
|
|
|
|
/// Internal stored object: header plus opaque payload bytes.
|
|
#[derive(Debug, Clone)]
|
|
pub struct StoredObject {
|
|
pub header: ObjectHeader,
|
|
/// Raw payload bytes for byte-oriented kinds (e.g., String, Bytes).
|
|
pub payload: Vec<u8>,
|
|
/// Optional typed elements for `ObjectKind::Array`.
|
|
/// When present, `header.payload_len` must equal `array_elems.len() as u32`.
|
|
pub array_elems: Option<Vec<Value>>,
|
|
/// Optional captured environment for `ObjectKind::Closure`.
|
|
/// `header.payload_len` stores the fixed-size metadata length (8 bytes):
|
|
/// [fn_id: u32][env_len: u32].
|
|
/// The actual env slots are stored here to remain GC-visible.
|
|
pub closure_env: Option<Vec<Value>>,
|
|
}
|
|
|
|
/// Simple vector-backed heap. No GC or compaction.
|
|
#[derive(Debug, Default, Clone)]
|
|
pub struct Heap {
|
|
// Tombstone-aware store: Some(obj) = live allocation; None = freed slot.
|
|
objects: Vec<Option<StoredObject>>,
|
|
}
|
|
|
|
impl Heap {
|
|
pub fn new() -> Self { Self { objects: Vec::new() } }
|
|
|
|
/// Allocate a new object with the given kind and raw payload bytes.
|
|
/// Returns an opaque `HeapRef` handle.
|
|
pub fn allocate_object(&mut self, kind: ObjectKind, payload: &[u8]) -> HeapRef {
|
|
let header = ObjectHeader::new(kind, payload.len() as u32);
|
|
let obj = StoredObject { header, payload: payload.to_vec(), array_elems: None, closure_env: None };
|
|
let idx = self.objects.len();
|
|
// No free-list reuse in this PR: append and keep indices stable.
|
|
self.objects.push(Some(obj));
|
|
HeapRef(idx as u32)
|
|
}
|
|
|
|
/// Allocate a new `Array` object with the given `Value` elements.
|
|
/// `payload_len` stores the element count; raw `payload` bytes are empty.
|
|
pub fn allocate_array(&mut self, elements: Vec<Value>) -> HeapRef {
|
|
let header = ObjectHeader::new(ObjectKind::Array, elements.len() as u32);
|
|
let obj = StoredObject { header, payload: Vec::new(), array_elems: Some(elements), closure_env: None };
|
|
let idx = self.objects.len();
|
|
// No free-list reuse in this PR: append and keep indices stable.
|
|
self.objects.push(Some(obj));
|
|
HeapRef(idx as u32)
|
|
}
|
|
|
|
/// Allocate a new `Closure` object with the given function id and captured environment.
|
|
/// Layout:
|
|
/// payload bytes: [fn_id: u32][env_len: u32]
|
|
/// env slots: stored out-of-line in `closure_env` for GC visibility
|
|
pub fn alloc_closure(&mut self, fn_id: u32, env_values: &[Value]) -> HeapRef {
|
|
let mut payload = Vec::with_capacity(8);
|
|
payload.extend_from_slice(&fn_id.to_le_bytes());
|
|
let env_len = env_values.len() as u32;
|
|
payload.extend_from_slice(&env_len.to_le_bytes());
|
|
|
|
let header = ObjectHeader::new(ObjectKind::Closure, payload.len() as u32);
|
|
let obj = StoredObject {
|
|
header,
|
|
payload,
|
|
array_elems: None,
|
|
closure_env: Some(env_values.to_vec()),
|
|
};
|
|
let idx = self.objects.len();
|
|
self.objects.push(Some(obj));
|
|
HeapRef(idx as u32)
|
|
}
|
|
|
|
/// Returns true if this handle refers to an allocated object.
|
|
pub fn is_valid(&self, r: HeapRef) -> bool {
|
|
let idx = r.0 as usize;
|
|
if idx >= self.objects.len() { return false; }
|
|
self.objects[idx].is_some()
|
|
}
|
|
|
|
/// Get immutable access to an object's header by handle.
|
|
pub fn header(&self, r: HeapRef) -> Option<&ObjectHeader> {
|
|
self.objects
|
|
.get(r.0 as usize)
|
|
.and_then(|slot| slot.as_ref())
|
|
.map(|o| &o.header)
|
|
}
|
|
|
|
/// Internal: get mutable access to an object's header by handle.
|
|
fn header_mut(&mut self, r: HeapRef) -> Option<&mut ObjectHeader> {
|
|
self.objects
|
|
.get_mut(r.0 as usize)
|
|
.and_then(|slot| slot.as_mut())
|
|
.map(|o| &mut o.header)
|
|
}
|
|
|
|
/// Internal: enumerate inner `HeapRef` children of an object.
|
|
fn children_of(&self, r: HeapRef) -> impl Iterator<Item = HeapRef> + '_ {
|
|
let idx = r.0 as usize;
|
|
self.objects
|
|
.get(idx)
|
|
.and_then(|slot| slot.as_ref())
|
|
.map(|o| match o.header.kind {
|
|
ObjectKind::Array => {
|
|
// Traverse only Value::HeapRef inside the array.
|
|
o.array_elems
|
|
.as_ref()
|
|
.into_iter()
|
|
.flat_map(|v| v.iter())
|
|
.filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None })
|
|
.collect::<Vec<_>>()
|
|
.into_iter()
|
|
}
|
|
ObjectKind::Closure => {
|
|
// Traverse only Value::HeapRef inside the closure env.
|
|
o.closure_env
|
|
.as_ref()
|
|
.into_iter()
|
|
.flat_map(|v| v.iter())
|
|
.filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None })
|
|
.collect::<Vec<_>>()
|
|
.into_iter()
|
|
}
|
|
// These kinds have no inner references in this PR.
|
|
ObjectKind::String | ObjectKind::Bytes | ObjectKind::UserData | ObjectKind::Unknown => {
|
|
Vec::new().into_iter()
|
|
}
|
|
})
|
|
.into_iter()
|
|
.flatten()
|
|
}
|
|
|
|
/// Read the `fn_id` stored in a closure object. Returns None if kind mismatch or invalid ref.
|
|
pub fn closure_fn_id(&self, r: HeapRef) -> Option<u32> {
|
|
let idx = r.0 as usize;
|
|
let slot = self.objects.get(idx)?.as_ref()?;
|
|
if slot.header.kind != ObjectKind::Closure { return None; }
|
|
if slot.payload.len() < 8 { return None; }
|
|
let mut bytes = [0u8; 4];
|
|
bytes.copy_from_slice(&slot.payload[0..4]);
|
|
Some(u32::from_le_bytes(bytes))
|
|
}
|
|
|
|
/// Get the captured environment slice of a closure. Returns None if kind mismatch or invalid ref.
|
|
pub fn closure_env_slice(&self, r: HeapRef) -> Option<&[Value]> {
|
|
let idx = r.0 as usize;
|
|
let slot = self.objects.get(idx)?.as_ref()?;
|
|
if slot.header.kind != ObjectKind::Closure { return None; }
|
|
slot.closure_env.as_deref()
|
|
}
|
|
|
|
/// Mark phase: starting from the given roots, traverse and set mark bits
|
|
/// on all reachable objects. Uses an explicit stack to avoid recursion.
|
|
pub fn mark_from_roots<I: IntoIterator<Item = HeapRef>>(&mut self, roots: I) {
|
|
let mut stack: Vec<HeapRef> = roots.into_iter().collect();
|
|
|
|
while let Some(r) = stack.pop() {
|
|
if !self.is_valid(r) { continue; }
|
|
|
|
// If already marked, skip.
|
|
let already_marked = self.header(r).map(|h| h.is_marked()).unwrap_or(false);
|
|
if already_marked { continue; }
|
|
|
|
// Set mark bit.
|
|
if let Some(h) = self.header_mut(r) { h.set_marked(true); }
|
|
|
|
// Push children.
|
|
for child in self.children_of(r) {
|
|
if self.is_valid(child) {
|
|
// Check child's mark state cheaply to reduce stack churn.
|
|
let marked = self.header(child).map(|h| h.is_marked()).unwrap_or(false);
|
|
if !marked { stack.push(child); }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Sweep phase: reclaim unmarked objects by turning their slots into
|
|
/// tombstones (None), and clear the mark bit on the remaining live ones
|
|
/// to prepare for the next GC cycle. Does not move or compact objects.
|
|
pub fn sweep(&mut self) {
|
|
for slot in self.objects.iter_mut() {
|
|
if let Some(obj) = slot {
|
|
if obj.header.is_marked() {
|
|
// Live: clear mark for next cycle.
|
|
obj.header.set_marked(false);
|
|
} else {
|
|
// Unreachable: reclaim by dropping and turning into tombstone.
|
|
*slot = None;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Current number of allocated (live) objects.
|
|
pub fn len(&self) -> usize { self.objects.iter().filter(|s| s.is_some()).count() }
|
|
pub fn is_empty(&self) -> bool { self.len() == 0 }
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn basic_allocation_returns_valid_refs() {
|
|
let mut heap = Heap::new();
|
|
|
|
let r1 = heap.allocate_object(ObjectKind::String, b"hello");
|
|
let r2 = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3, 4]);
|
|
let r3 = heap.allocate_array(vec![]);
|
|
|
|
assert!(heap.is_valid(r1));
|
|
assert!(heap.is_valid(r2));
|
|
assert!(heap.is_valid(r3));
|
|
assert_eq!(heap.len(), 3);
|
|
|
|
let h1 = heap.header(r1).unwrap();
|
|
assert_eq!(h1.kind, ObjectKind::String);
|
|
assert_eq!(h1.payload_len, 5);
|
|
|
|
let h2 = heap.header(r2).unwrap();
|
|
assert_eq!(h2.kind, ObjectKind::Bytes);
|
|
assert_eq!(h2.payload_len, 4);
|
|
|
|
let h3 = heap.header(r3).unwrap();
|
|
assert_eq!(h3.kind, ObjectKind::Array);
|
|
assert_eq!(h3.payload_len, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn mark_reachable_through_array() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Target object B (unreferenced yet)
|
|
let b = heap.allocate_object(ObjectKind::Bytes, &[9, 9, 9]);
|
|
// Array A that contains a reference to B among other primitives
|
|
let a = heap.allocate_array(vec![
|
|
Value::Int32(1),
|
|
Value::HeapRef(b),
|
|
Value::Boolean(false),
|
|
]);
|
|
|
|
// Mark starting from root A
|
|
heap.mark_from_roots([a]);
|
|
|
|
// Both A and B must be marked; random other objects are not allocated
|
|
assert!(heap.header(a).unwrap().is_marked());
|
|
assert!(heap.header(b).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn mark_does_not_mark_unreachable() {
|
|
let mut heap = Heap::new();
|
|
|
|
let unreachable = heap.allocate_object(ObjectKind::String, b"orphan");
|
|
let root = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]);
|
|
|
|
heap.mark_from_roots([root]);
|
|
|
|
assert!(heap.header(root).unwrap().is_marked());
|
|
assert!(!heap.header(unreachable).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn mark_handles_cycles() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Create two arrays that reference each other: A -> B, B -> A
|
|
// Allocate empty arrays first to get handles
|
|
let a = heap.allocate_array(vec![]);
|
|
let b = heap.allocate_array(vec![]);
|
|
|
|
// Now mutate their internal vectors via re-allocation pattern:
|
|
// replace with arrays containing cross-references. Since our simple
|
|
// heap doesn't support in-place element edits via API, simulate by
|
|
// directly editing stored objects.
|
|
if let Some(slot) = heap.objects.get_mut(a.0 as usize) {
|
|
if let Some(obj) = slot.as_mut() {
|
|
obj.array_elems = Some(vec![Value::HeapRef(b)]);
|
|
obj.header.payload_len = 1;
|
|
}
|
|
}
|
|
if let Some(slot) = heap.objects.get_mut(b.0 as usize) {
|
|
if let Some(obj) = slot.as_mut() {
|
|
obj.array_elems = Some(vec![Value::HeapRef(a)]);
|
|
obj.header.payload_len = 1;
|
|
}
|
|
}
|
|
|
|
// Mark from A; should terminate and mark both.
|
|
heap.mark_from_roots([a]);
|
|
|
|
assert!(heap.header(a).unwrap().is_marked());
|
|
assert!(heap.header(b).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn closure_allocation_with_empty_env() {
|
|
let mut heap = Heap::new();
|
|
let c = heap.alloc_closure(42, &[]);
|
|
assert!(heap.is_valid(c));
|
|
let h = heap.header(c).unwrap();
|
|
assert_eq!(h.kind, ObjectKind::Closure);
|
|
// payload has only metadata (8 bytes)
|
|
assert_eq!(h.payload_len, 8);
|
|
assert_eq!(heap.closure_fn_id(c), Some(42));
|
|
let env = heap.closure_env_slice(c).unwrap();
|
|
assert_eq!(env.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn closure_allocation_with_env_and_access() {
|
|
let mut heap = Heap::new();
|
|
let a = heap.allocate_object(ObjectKind::String, b"a");
|
|
let env_vals = vec![Value::Int32(7), Value::HeapRef(a), Value::Boolean(true)];
|
|
let c = heap.alloc_closure(7, &env_vals);
|
|
|
|
let h = heap.header(c).unwrap();
|
|
assert_eq!(h.kind, ObjectKind::Closure);
|
|
assert_eq!(h.payload_len, 8);
|
|
assert_eq!(heap.closure_fn_id(c), Some(7));
|
|
let env = heap.closure_env_slice(c).unwrap();
|
|
assert_eq!(env, &env_vals[..]);
|
|
|
|
// GC traversal should see the inner HeapRef in closure env when marking.
|
|
heap.mark_from_roots([c]);
|
|
assert!(heap.header(c).unwrap().is_marked());
|
|
assert!(heap.header(a).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn sweep_reclaims_unreachable_and_invalidates_handles() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Allocate two objects; only one will be a root.
|
|
let unreachable = heap.allocate_object(ObjectKind::String, b"orphan");
|
|
let root = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]);
|
|
|
|
// Mark from root and then sweep.
|
|
heap.mark_from_roots([root]);
|
|
// Precondition: root marked, unreachable not marked.
|
|
assert!(heap.header(root).unwrap().is_marked());
|
|
assert!(!heap.header(unreachable).unwrap().is_marked());
|
|
|
|
heap.sweep();
|
|
|
|
// Unreachable must be reclaimed: handle becomes invalid.
|
|
assert!(!heap.is_valid(unreachable));
|
|
assert!(heap.header(unreachable).is_none());
|
|
|
|
// Root must survive and have its mark bit cleared for next cycle.
|
|
assert!(heap.is_valid(root));
|
|
assert!(!heap.header(root).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn sweep_keeps_indices_stable_and_len_counts_live() {
|
|
let mut heap = Heap::new();
|
|
|
|
let a = heap.allocate_object(ObjectKind::String, b"a");
|
|
let b = heap.allocate_object(ObjectKind::String, b"b");
|
|
let c = heap.allocate_object(ObjectKind::String, b"c");
|
|
|
|
// Only keep A live.
|
|
heap.mark_from_roots([a]);
|
|
heap.sweep();
|
|
|
|
// B and C are now invalidated, A remains valid.
|
|
assert!(heap.is_valid(a));
|
|
assert!(!heap.is_valid(b));
|
|
assert!(!heap.is_valid(c));
|
|
|
|
// Len counts only live objects.
|
|
assert_eq!(heap.len(), 1);
|
|
|
|
// Indices are stable: A's index is still within the backing store bounds.
|
|
// We can't access internal vector here, but stability is implied by handle not changing.
|
|
assert_eq!(a.0, a.0); // placeholder sanity check
|
|
}
|
|
|
|
#[test]
|
|
fn sweep_reclaims_unrooted_cycle() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Build a 2-node cycle A <-> B using internal mutation (module-private access).
|
|
let a = heap.allocate_array(vec![]);
|
|
let b = heap.allocate_array(vec![]);
|
|
|
|
// Make A point to B and B point to A.
|
|
if let Some(slot) = heap.objects.get_mut(a.0 as usize) {
|
|
if let Some(obj) = slot.as_mut() {
|
|
obj.array_elems = Some(vec![Value::HeapRef(b)]);
|
|
obj.header.payload_len = 1;
|
|
}
|
|
}
|
|
if let Some(slot) = heap.objects.get_mut(b.0 as usize) {
|
|
if let Some(obj) = slot.as_mut() {
|
|
obj.array_elems = Some(vec![Value::HeapRef(a)]);
|
|
obj.header.payload_len = 1;
|
|
}
|
|
}
|
|
|
|
// No roots: perform sweep directly; both should be reclaimed.
|
|
heap.sweep();
|
|
|
|
assert!(!heap.is_valid(a));
|
|
assert!(!heap.is_valid(b));
|
|
assert_eq!(heap.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn gc_scans_closure_env_and_keeps_captured_heap_object() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Captured heap object.
|
|
let obj = heap.allocate_object(ObjectKind::Bytes, &[4, 5, 6]);
|
|
|
|
// Closure capturing the heap object among other primitive values.
|
|
let env = [Value::Boolean(true), Value::HeapRef(obj), Value::Int32(123)];
|
|
let clo = heap.alloc_closure(1, &env);
|
|
|
|
// Mark from closure root: both closure and captured object must be marked.
|
|
heap.mark_from_roots([clo]);
|
|
|
|
assert!(heap.header(clo).unwrap().is_marked());
|
|
assert!(heap.header(obj).unwrap().is_marked());
|
|
|
|
// Sweep should keep both and clear their marks.
|
|
heap.sweep();
|
|
assert!(heap.is_valid(clo));
|
|
assert!(heap.is_valid(obj));
|
|
assert!(!heap.header(clo).unwrap().is_marked());
|
|
assert!(!heap.header(obj).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn gc_scans_nested_closures_and_keeps_inner_when_outer_is_rooted() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Inner closure (no env).
|
|
let inner = heap.alloc_closure(2, &[]);
|
|
|
|
// Outer closure captures the inner closure as a Value::HeapRef.
|
|
let outer = heap.alloc_closure(3, &[Value::HeapRef(inner)]);
|
|
|
|
// Root only the outer closure.
|
|
heap.mark_from_roots([outer]);
|
|
|
|
// Both must be marked reachable.
|
|
assert!(heap.header(outer).unwrap().is_marked());
|
|
assert!(heap.header(inner).unwrap().is_marked());
|
|
|
|
// After sweep, both survive and have marks cleared.
|
|
heap.sweep();
|
|
assert!(heap.is_valid(outer));
|
|
assert!(heap.is_valid(inner));
|
|
assert!(!heap.header(outer).unwrap().is_marked());
|
|
assert!(!heap.header(inner).unwrap().is_marked());
|
|
}
|
|
|
|
#[test]
|
|
fn gc_collects_unreferenced_closure_and_captures() {
|
|
let mut heap = Heap::new();
|
|
|
|
// Captured heap object and a closure capturing it.
|
|
let captured = heap.allocate_object(ObjectKind::String, b"dead");
|
|
let clo = heap.alloc_closure(9, &[Value::HeapRef(captured)]);
|
|
|
|
// No roots are provided; sweeping should reclaim both.
|
|
heap.sweep();
|
|
|
|
assert!(!heap.is_valid(clo));
|
|
assert!(!heap.is_valid(captured));
|
|
assert_eq!(heap.len(), 0);
|
|
}
|
|
}
|