use crate::{ObjectHeader, ObjectKind}; use crate::call_frame::CallFrame; use prometeu_bytecode::{HeapRef, Value}; /// Internal stored object: header plus opaque payload bytes. #[derive(Debug, Clone)] pub struct StoredObject { pub header: ObjectHeader, /// Raw payload bytes for byte-oriented kinds (e.g., String, Bytes). pub payload: Vec, /// Optional typed elements for `ObjectKind::Array`. /// When present, `header.payload_len` must equal `array_elems.len() as u32`. pub array_elems: Option>, /// Optional captured environment for `ObjectKind::Closure`. /// Invariants for closures: /// - `header.payload_len == 8` and `payload` bytes are `[fn_id: u32][env_len: u32]` (LE). /// - The actual `env_len` Value slots are stored here (not in `payload`) so /// they stay directly GC-visible. The GC must traverse exactly `env_len` /// entries from this slice, in order. pub closure_env: Option>, /// Optional coroutine data for `ObjectKind::Coroutine`. pub coroutine: Option, } /// Execution state of a coroutine. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum CoroutineState { Ready, Running, Sleeping, Finished, Faulted, } /// Stored payload for coroutine objects. #[derive(Debug, Clone)] pub struct CoroutineData { pub state: CoroutineState, pub wake_tick: u64, pub stack: Vec, pub frames: Vec, } /// Simple vector-backed heap. No GC or compaction. #[derive(Debug, Default, Clone)] pub struct Heap { // Tombstone-aware store: Some(obj) = live allocation; None = freed slot. objects: Vec>, } impl Heap { pub fn new() -> Self { Self { objects: Vec::new() } } /// Allocate a new object with the given kind and raw payload bytes. /// Returns an opaque `HeapRef` handle. pub fn allocate_object(&mut self, kind: ObjectKind, payload: &[u8]) -> HeapRef { let header = ObjectHeader::new(kind, payload.len() as u32); let obj = StoredObject { header, payload: payload.to_vec(), array_elems: None, closure_env: None, coroutine: None }; let idx = self.objects.len(); // No free-list reuse in this PR: append and keep indices stable. self.objects.push(Some(obj)); HeapRef(idx as u32) } /// Allocate a new `Array` object with the given `Value` elements. /// `payload_len` stores the element count; raw `payload` bytes are empty. pub fn allocate_array(&mut self, elements: Vec) -> HeapRef { let header = ObjectHeader::new(ObjectKind::Array, elements.len() as u32); let obj = StoredObject { header, payload: Vec::new(), array_elems: Some(elements), closure_env: None, coroutine: None }; let idx = self.objects.len(); // No free-list reuse in this PR: append and keep indices stable. self.objects.push(Some(obj)); HeapRef(idx as u32) } /// Allocate a new `Closure` object with the given function id and captured environment. /// Layout: /// payload bytes: [fn_id: u32][env_len: u32] /// env slots: stored out-of-line in `closure_env` for GC visibility pub fn alloc_closure(&mut self, fn_id: u32, env_values: &[Value]) -> HeapRef { let mut payload = Vec::with_capacity(8); payload.extend_from_slice(&fn_id.to_le_bytes()); let env_len = env_values.len() as u32; payload.extend_from_slice(&env_len.to_le_bytes()); let header = ObjectHeader::new(ObjectKind::Closure, payload.len() as u32); let obj = StoredObject { header, payload, array_elems: None, closure_env: Some(env_values.to_vec()), coroutine: None, }; let idx = self.objects.len(); self.objects.push(Some(obj)); HeapRef(idx as u32) } /// Allocate a new `Coroutine` object with provided initial data. /// `payload_len` is 0; stack and frames are stored out-of-line for GC visibility. pub fn allocate_coroutine( &mut self, state: CoroutineState, wake_tick: u64, stack: Vec, frames: Vec, ) -> HeapRef { let header = ObjectHeader::new(ObjectKind::Coroutine, 0); let obj = StoredObject { header, payload: Vec::new(), array_elems: None, closure_env: None, coroutine: Some(CoroutineData { state, wake_tick, stack, frames }), }; let idx = self.objects.len(); self.objects.push(Some(obj)); HeapRef(idx as u32) } /// Returns true if this handle refers to an allocated object. pub fn is_valid(&self, r: HeapRef) -> bool { let idx = r.0 as usize; if idx >= self.objects.len() { return false; } self.objects[idx].is_some() } /// Get immutable access to an object's header by handle. pub fn header(&self, r: HeapRef) -> Option<&ObjectHeader> { self.objects .get(r.0 as usize) .and_then(|slot| slot.as_ref()) .map(|o| &o.header) } /// Internal: get mutable access to an object's header by handle. fn header_mut(&mut self, r: HeapRef) -> Option<&mut ObjectHeader> { self.objects .get_mut(r.0 as usize) .and_then(|slot| slot.as_mut()) .map(|o| &mut o.header) } /// Internal: enumerate inner `HeapRef` children of an object without allocating. /// Note: This helper is no longer used by GC mark; kept for potential diagnostics. fn children_of(&self, r: HeapRef) -> Box + '_> { let idx = r.0 as usize; if let Some(Some(o)) = self.objects.get(idx) { match o.header.kind { ObjectKind::Array => { let it = o .array_elems .as_deref() .into_iter() .flat_map(|slice| slice.iter()) .filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None }); return Box::new(it); } ObjectKind::Closure => { // Read env_len from payload; traverse exactly that many entries. debug_assert_eq!(o.header.kind, ObjectKind::Closure); debug_assert_eq!(o.payload.len(), 8, "closure payload metadata must be 8 bytes"); let mut nbytes = [0u8; 4]; nbytes.copy_from_slice(&o.payload[4..8]); let env_len = u32::from_le_bytes(nbytes) as usize; let it = o .closure_env .as_deref() .map(|slice| { debug_assert_eq!(slice.len(), env_len, "closure env length must match encoded env_len"); &slice[..env_len] }) .into_iter() .flat_map(|slice| slice.iter()) .filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None }); return Box::new(it); } ObjectKind::Coroutine => { if let Some(co) = o.coroutine.as_ref() { let it = co .stack .iter() .filter_map(|v| if let Value::HeapRef(h) = v { Some(*h) } else { None }); return Box::new(it); } return Box::new(std::iter::empty()); } _ => return Box::new(std::iter::empty()), } } Box::new(std::iter::empty()) } /// Read the `fn_id` stored in a closure object. Returns None if kind mismatch or invalid ref. pub fn closure_fn_id(&self, r: HeapRef) -> Option { let idx = r.0 as usize; let slot = self.objects.get(idx)?.as_ref()?; if slot.header.kind != ObjectKind::Closure { return None; } if slot.payload.len() < 8 { return None; } debug_assert_eq!(slot.header.payload_len, 8); let mut bytes = [0u8; 4]; bytes.copy_from_slice(&slot.payload[0..4]); Some(u32::from_le_bytes(bytes)) } /// Get the captured environment slice of a closure. Returns None if kind mismatch or invalid ref. pub fn closure_env_slice(&self, r: HeapRef) -> Option<&[Value]> { let idx = r.0 as usize; let slot = self.objects.get(idx)?.as_ref()?; if slot.header.kind != ObjectKind::Closure { return None; } if slot.payload.len() >= 8 { let mut nbytes = [0u8; 4]; nbytes.copy_from_slice(&slot.payload[4..8]); let env_len = u32::from_le_bytes(nbytes) as usize; if let Some(env) = slot.closure_env.as_deref() { debug_assert_eq!(env.len(), env_len); } } slot.closure_env.as_deref() } /// Mark phase: starting from the given roots, traverse and set mark bits /// on all reachable objects. Uses an explicit stack to avoid recursion. pub fn mark_from_roots>(&mut self, roots: I) { let mut stack: Vec = roots.into_iter().collect(); while let Some(r) = stack.pop() { if !self.is_valid(r) { continue; } // If already marked, skip. let already_marked = self.header(r).map(|h| h.is_marked()).unwrap_or(false); if already_marked { continue; } // Set mark bit. if let Some(h) = self.header_mut(r) { h.set_marked(true); } // Push children by scanning payload directly (no intermediate Vec allocs). let idx = r.0 as usize; if let Some(Some(obj)) = self.objects.get(idx) { match obj.header.kind { ObjectKind::Array => { if let Some(elems) = obj.array_elems.as_ref() { for val in elems.iter() { if let Value::HeapRef(child) = val { if self.is_valid(*child) { let marked = self.header(*child).map(|h| h.is_marked()).unwrap_or(false); if !marked { stack.push(*child); } } } } } } ObjectKind::Closure => { debug_assert_eq!(obj.payload.len(), 8, "closure payload must be 8 bytes"); let mut nbytes = [0u8; 4]; nbytes.copy_from_slice(&obj.payload[4..8]); let env_len = u32::from_le_bytes(nbytes) as usize; if let Some(env) = obj.closure_env.as_ref() { debug_assert_eq!(env.len(), env_len, "closure env len must match encoded env_len"); for val in env[..env_len].iter() { if let Value::HeapRef(child) = val { if self.is_valid(*child) { let marked = self.header(*child).map(|h| h.is_marked()).unwrap_or(false); if !marked { stack.push(*child); } } } } } } ObjectKind::Coroutine => { if let Some(co) = obj.coroutine.as_ref() { for val in co.stack.iter() { if let Value::HeapRef(child) = val { if self.is_valid(*child) { let marked = self.header(*child).map(|h| h.is_marked()).unwrap_or(false); if !marked { stack.push(*child); } } } } } } _ => {} } } } } /// Sweep phase: reclaim unmarked objects by turning their slots into /// tombstones (None), and clear the mark bit on the remaining live ones /// to prepare for the next GC cycle. Does not move or compact objects. pub fn sweep(&mut self) { for slot in self.objects.iter_mut() { if let Some(obj) = slot { if obj.header.is_marked() { // Live: clear mark for next cycle. obj.header.set_marked(false); } else { // Unreachable: reclaim by dropping and turning into tombstone. *slot = None; } } } } /// Current number of allocated (live) objects. pub fn len(&self) -> usize { self.objects.iter().filter(|s| s.is_some()).count() } pub fn is_empty(&self) -> bool { self.len() == 0 } } #[cfg(test)] mod tests { use super::*; #[test] fn basic_allocation_returns_valid_refs() { let mut heap = Heap::new(); let r1 = heap.allocate_object(ObjectKind::String, b"hello"); let r2 = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3, 4]); let r3 = heap.allocate_array(vec![]); assert!(heap.is_valid(r1)); assert!(heap.is_valid(r2)); assert!(heap.is_valid(r3)); assert_eq!(heap.len(), 3); let h1 = heap.header(r1).unwrap(); assert_eq!(h1.kind, ObjectKind::String); assert_eq!(h1.payload_len, 5); let h2 = heap.header(r2).unwrap(); assert_eq!(h2.kind, ObjectKind::Bytes); assert_eq!(h2.payload_len, 4); let h3 = heap.header(r3).unwrap(); assert_eq!(h3.kind, ObjectKind::Array); assert_eq!(h3.payload_len, 0); } #[test] fn allocate_and_transition_coroutine() { let mut heap = Heap::new(); // Create a coroutine with a small stack containing a HeapRef to verify GC traversal later. let obj_ref = heap.allocate_object(ObjectKind::Bytes, &[4, 5, 6]); let coro = heap.allocate_coroutine( CoroutineState::Ready, 0, vec![Value::Int32(1), Value::HeapRef(obj_ref)], vec![CallFrame { return_pc: 0, stack_base: 0, func_idx: 0 }], ); let hdr = heap.header(coro).unwrap(); assert_eq!(hdr.kind, ObjectKind::Coroutine); assert_eq!(hdr.payload_len, 0); // Manually mutate state transitions via access to inner data. { let slot = heap.objects.get_mut(coro.0 as usize).and_then(|s| s.as_mut()).unwrap(); let co = slot.coroutine.as_mut().unwrap(); assert_eq!(co.state, CoroutineState::Ready); co.state = CoroutineState::Running; assert_eq!(co.state, CoroutineState::Running); co.state = CoroutineState::Sleeping; co.wake_tick = 42; assert_eq!(co.wake_tick, 42); co.state = CoroutineState::Finished; assert_eq!(co.state, CoroutineState::Finished); } // GC should mark the object referenced from the coroutine stack when the coroutine is a root. heap.mark_from_roots([coro]); assert!(heap.header(obj_ref).unwrap().is_marked()); } #[test] fn mark_reachable_through_array() { let mut heap = Heap::new(); // Target object B (unreferenced yet) let b = heap.allocate_object(ObjectKind::Bytes, &[9, 9, 9]); // Array A that contains a reference to B among other primitives let a = heap.allocate_array(vec![ Value::Int32(1), Value::HeapRef(b), Value::Boolean(false), ]); // Mark starting from root A heap.mark_from_roots([a]); // Both A and B must be marked; random other objects are not allocated assert!(heap.header(a).unwrap().is_marked()); assert!(heap.header(b).unwrap().is_marked()); } #[test] fn mark_does_not_mark_unreachable() { let mut heap = Heap::new(); let unreachable = heap.allocate_object(ObjectKind::String, b"orphan"); let root = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]); heap.mark_from_roots([root]); assert!(heap.header(root).unwrap().is_marked()); assert!(!heap.header(unreachable).unwrap().is_marked()); } #[test] fn mark_handles_cycles() { let mut heap = Heap::new(); // Create two arrays that reference each other: A -> B, B -> A // Allocate empty arrays first to get handles let a = heap.allocate_array(vec![]); let b = heap.allocate_array(vec![]); // Now mutate their internal vectors via re-allocation pattern: // replace with arrays containing cross-references. Since our simple // heap doesn't support in-place element edits via API, simulate by // directly editing stored objects. if let Some(slot) = heap.objects.get_mut(a.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(b)]); obj.header.payload_len = 1; } } if let Some(slot) = heap.objects.get_mut(b.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(a)]); obj.header.payload_len = 1; } } // Mark from A; should terminate and mark both. heap.mark_from_roots([a]); assert!(heap.header(a).unwrap().is_marked()); assert!(heap.header(b).unwrap().is_marked()); } #[test] fn closure_allocation_with_empty_env() { let mut heap = Heap::new(); let c = heap.alloc_closure(42, &[]); assert!(heap.is_valid(c)); let h = heap.header(c).unwrap(); assert_eq!(h.kind, ObjectKind::Closure); // payload has only metadata (8 bytes) assert_eq!(h.payload_len, 8); assert_eq!(heap.closure_fn_id(c), Some(42)); let env = heap.closure_env_slice(c).unwrap(); assert_eq!(env.len(), 0); } #[test] fn closure_allocation_with_env_and_access() { let mut heap = Heap::new(); let a = heap.allocate_object(ObjectKind::String, b"a"); let env_vals = vec![Value::Int32(7), Value::HeapRef(a), Value::Boolean(true)]; let c = heap.alloc_closure(7, &env_vals); let h = heap.header(c).unwrap(); assert_eq!(h.kind, ObjectKind::Closure); assert_eq!(h.payload_len, 8); assert_eq!(heap.closure_fn_id(c), Some(7)); let env = heap.closure_env_slice(c).unwrap(); assert_eq!(env, &env_vals[..]); // GC traversal should see the inner HeapRef in closure env when marking. heap.mark_from_roots([c]); assert!(heap.header(c).unwrap().is_marked()); assert!(heap.header(a).unwrap().is_marked()); } #[test] fn sweep_reclaims_unreachable_and_invalidates_handles() { let mut heap = Heap::new(); // Allocate two objects; only one will be a root. let unreachable = heap.allocate_object(ObjectKind::String, b"orphan"); let root = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]); // Mark from root and then sweep. heap.mark_from_roots([root]); // Precondition: root marked, unreachable not marked. assert!(heap.header(root).unwrap().is_marked()); assert!(!heap.header(unreachable).unwrap().is_marked()); heap.sweep(); // Unreachable must be reclaimed: handle becomes invalid. assert!(!heap.is_valid(unreachable)); assert!(heap.header(unreachable).is_none()); // Root must survive and have its mark bit cleared for next cycle. assert!(heap.is_valid(root)); assert!(!heap.header(root).unwrap().is_marked()); } #[test] fn sweep_keeps_indices_stable_and_len_counts_live() { let mut heap = Heap::new(); let a = heap.allocate_object(ObjectKind::String, b"a"); let b = heap.allocate_object(ObjectKind::String, b"b"); let c = heap.allocate_object(ObjectKind::String, b"c"); // Only keep A live. heap.mark_from_roots([a]); heap.sweep(); // B and C are now invalidated, A remains valid. assert!(heap.is_valid(a)); assert!(!heap.is_valid(b)); assert!(!heap.is_valid(c)); // Len counts only live objects. assert_eq!(heap.len(), 1); // Indices are stable: A's index is still within the backing store bounds. // We can't access internal vector here, but stability is implied by handle not changing. assert_eq!(a.0, a.0); // placeholder sanity check } #[test] fn sweep_reclaims_unrooted_cycle() { let mut heap = Heap::new(); // Build a 2-node cycle A <-> B using internal mutation (module-private access). let a = heap.allocate_array(vec![]); let b = heap.allocate_array(vec![]); // Make A point to B and B point to A. if let Some(slot) = heap.objects.get_mut(a.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(b)]); obj.header.payload_len = 1; } } if let Some(slot) = heap.objects.get_mut(b.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(a)]); obj.header.payload_len = 1; } } // No roots: perform sweep directly; both should be reclaimed. heap.sweep(); assert!(!heap.is_valid(a)); assert!(!heap.is_valid(b)); assert_eq!(heap.len(), 0); } #[test] fn gc_scans_closure_env_and_keeps_captured_heap_object() { let mut heap = Heap::new(); // Captured heap object. let obj = heap.allocate_object(ObjectKind::Bytes, &[4, 5, 6]); // Closure capturing the heap object among other primitive values. let env = [Value::Boolean(true), Value::HeapRef(obj), Value::Int32(123)]; let clo = heap.alloc_closure(1, &env); // Mark from closure root: both closure and captured object must be marked. heap.mark_from_roots([clo]); assert!(heap.header(clo).unwrap().is_marked()); assert!(heap.header(obj).unwrap().is_marked()); // Sweep should keep both and clear their marks. heap.sweep(); assert!(heap.is_valid(clo)); assert!(heap.is_valid(obj)); assert!(!heap.header(clo).unwrap().is_marked()); assert!(!heap.header(obj).unwrap().is_marked()); } #[test] fn gc_scans_nested_closures_and_keeps_inner_when_outer_is_rooted() { let mut heap = Heap::new(); // Inner closure (no env). let inner = heap.alloc_closure(2, &[]); // Outer closure captures the inner closure as a Value::HeapRef. let outer = heap.alloc_closure(3, &[Value::HeapRef(inner)]); // Root only the outer closure. heap.mark_from_roots([outer]); // Both must be marked reachable. assert!(heap.header(outer).unwrap().is_marked()); assert!(heap.header(inner).unwrap().is_marked()); // After sweep, both survive and have marks cleared. heap.sweep(); assert!(heap.is_valid(outer)); assert!(heap.is_valid(inner)); assert!(!heap.header(outer).unwrap().is_marked()); assert!(!heap.header(inner).unwrap().is_marked()); } #[test] fn gc_collects_unreferenced_closure_and_captures() { let mut heap = Heap::new(); // Captured heap object and a closure capturing it. let captured = heap.allocate_object(ObjectKind::String, b"dead"); let clo = heap.alloc_closure(9, &[Value::HeapRef(captured)]); // No roots are provided; sweeping should reclaim both. heap.sweep(); assert!(!heap.is_valid(clo)); assert!(!heap.is_valid(captured)); assert_eq!(heap.len(), 0); } }