use crate::call_frame::CallFrame; use crate::object::{ObjectHeader, ObjectKind}; use prometeu_bytecode::{HeapRef, Value}; /// Internal stored object: header plus opaque payload bytes. #[derive(Debug, Clone)] pub struct StoredObject { pub header: ObjectHeader, /// Raw payload bytes for byte-oriented kinds (e.g., String, Bytes). pub payload: Vec, /// Optional typed elements for `ObjectKind::Array`. /// When present, `header.payload_len` must equal `array_elems.len() as u32`. pub array_elems: Option>, /// Optional captured environment for `ObjectKind::Closure`. /// Invariants for closures: /// - `header.payload_len == 8` and `payload` bytes are `[fn_id: u32][env_len: u32]` (LE). /// - The actual `env_len` Value slots are stored here (not in `payload`) so /// they stay directly GC-visible. The GC must traverse exactly `env_len` /// entries from this slice, in order. pub closure_env: Option>, /// Optional coroutine data for `ObjectKind::Coroutine`. pub coroutine: Option, } /// Execution state of a coroutine. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum CoroutineState { Ready, Running, Sleeping, Finished, // Faulted, } /// Stored payload for coroutine objects. #[derive(Debug, Clone)] pub struct CoroutineData { pub pc: usize, pub state: CoroutineState, pub wake_tick: u64, pub stack: Vec, pub frames: Vec, } /// Simple vector-backed heap. No GC or compaction. #[derive(Debug, Default, Clone)] pub struct Heap { // Tombstone-aware store: Some(obj) = live allocation; None = freed slot. objects: Vec>, // Reclaimed slots available for deterministic reuse (LIFO). free_list: Vec, } impl Heap { pub fn new() -> Self { Self { objects: Vec::new(), free_list: Vec::new() } } fn insert_object(&mut self, obj: StoredObject) -> HeapRef { if let Some(idx) = self.free_list.pop() { debug_assert!(self.objects.get(idx).is_some_and(|slot| slot.is_none())); self.objects[idx] = Some(obj); HeapRef(idx as u32) } else { let idx = self.objects.len(); self.objects.push(Some(obj)); HeapRef(idx as u32) } } /// Allocate a new object with the given kind and raw payload bytes. /// Returns an opaque `HeapRef` handle. #[cfg(test)] pub fn allocate_object(&mut self, kind: ObjectKind, payload: &[u8]) -> HeapRef { let header = ObjectHeader::new(kind, payload.len() as u32); let obj = StoredObject { header, payload: payload.to_vec(), array_elems: None, closure_env: None, coroutine: None, }; self.insert_object(obj) } /// Allocate a new `Array` object with the given `Value` elements. /// `payload_len` stores the element count; raw `payload` bytes are empty. #[cfg(test)] pub fn allocate_array(&mut self, elements: Vec) -> HeapRef { let header = ObjectHeader::new(ObjectKind::Array, elements.len() as u32); let obj = StoredObject { header, payload: Vec::new(), array_elems: Some(elements), closure_env: None, coroutine: None, }; self.insert_object(obj) } /// Allocate a new `Closure` object with the given function id and captured environment. /// Layout: /// payload bytes: [fn_id: u32][env_len: u32] /// env slots: stored out-of-line in `closure_env` for GC visibility pub fn alloc_closure(&mut self, fn_id: u32, env_values: &[Value]) -> HeapRef { let mut payload = Vec::with_capacity(8); payload.extend_from_slice(&fn_id.to_le_bytes()); let env_len = env_values.len() as u32; payload.extend_from_slice(&env_len.to_le_bytes()); let header = ObjectHeader::new(ObjectKind::Closure, payload.len() as u32); let obj = StoredObject { header, payload, array_elems: None, closure_env: Some(env_values.to_vec()), coroutine: None, }; self.insert_object(obj) } /// Allocate a new `Coroutine` object with provided initial data. /// `payload_len` is 0; stack and frames are stored out-of-line for GC visibility. pub fn allocate_coroutine( &mut self, pc: usize, state: CoroutineState, wake_tick: u64, stack: Vec, frames: Vec, ) -> HeapRef { let header = ObjectHeader::new(ObjectKind::Coroutine, 0); let obj = StoredObject { header, payload: Vec::new(), array_elems: None, closure_env: None, coroutine: Some(CoroutineData { pc, state, wake_tick, stack, frames }), }; self.insert_object(obj) } /// Returns true if this handle refers to an allocated object. pub fn is_valid(&self, r: HeapRef) -> bool { let idx = r.0 as usize; if idx >= self.objects.len() { return false; } self.objects[idx].is_some() } /// Returns a shared reference to the coroutine data for the given handle, if it is a Coroutine. #[cfg(test)] pub fn coroutine_data(&self, r: HeapRef) -> Option<&CoroutineData> { let idx = r.0 as usize; self.objects.get(idx).and_then(|slot| slot.as_ref()).and_then(|obj| obj.coroutine.as_ref()) } /// Returns a mutable reference to the coroutine data for the given handle, if it is a Coroutine. pub fn coroutine_data_mut(&mut self, r: HeapRef) -> Option<&mut CoroutineData> { let idx = r.0 as usize; self.objects .get_mut(idx) .and_then(|slot| slot.as_mut()) .and_then(|obj| obj.coroutine.as_mut()) } /// Get immutable access to an object's header by handle. pub fn header(&self, r: HeapRef) -> Option<&ObjectHeader> { self.objects.get(r.0 as usize).and_then(|slot| slot.as_ref()).map(|o| &o.header) } /// Internal: get mutable access to an object's header by handle. fn header_mut(&mut self, r: HeapRef) -> Option<&mut ObjectHeader> { self.objects.get_mut(r.0 as usize).and_then(|slot| slot.as_mut()).map(|o| &mut o.header) } // Internal: list inner `HeapRef` children of an object without allocating. // Note: GC mark no longer uses this helper; kept for potential diagnostics. // fn children_of(&self, r: HeapRef) -> Box + '_> { // let idx = r.0 as usize; // if let Some(Some(o)) = self.objects.get(idx) { // match o.header.kind { // ObjectKind::Array => { // let it = o // .array_elems // .as_deref() // .into_iter() // .flat_map(|slice| slice.iter()) // .filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None }); // return Box::new(it); // } // ObjectKind::Closure => { // // Read env_len from payload; traverse exactly that many entries. // debug_assert_eq!(o.header.kind, ObjectKind::Closure); // debug_assert_eq!(o.payload.len(), 8, "closure payload metadata must be 8 bytes"); // let mut nbytes = [0u8; 4]; // nbytes.copy_from_slice(&o.payload[4..8]); // let env_len = u32::from_le_bytes(nbytes) as usize; // let it = o // .closure_env // .as_deref() // .map(|slice| { // debug_assert_eq!(slice.len(), env_len, "closure env length must match encoded env_len"); // &slice[..env_len] // }) // .into_iter() // .flat_map(|slice| slice.iter()) // .filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None }); // return Box::new(it); // } // ObjectKind::Coroutine => { // if let Some(co) = o.coroutine.as_ref() { // let it = co // .stack // .iter() // .filter_map(|v| if let Value::HeapRef(h) = v { Some(*h) } else { None }); // return Box::new(it); // } // return Box::new(std::iter::empty()); // } // _ => return Box::new(std::iter::empty()), // } // } // Box::new(std::iter::empty()) // } /// Read the `fn_id` stored in a closure object. Returns None if kind mismatch or invalid ref. pub fn closure_fn_id(&self, r: HeapRef) -> Option { let idx = r.0 as usize; let slot = self.objects.get(idx)?.as_ref()?; if slot.header.kind != ObjectKind::Closure { return None; } if slot.payload.len() < 8 { return None; } debug_assert_eq!(slot.header.payload_len, 8); let mut bytes = [0u8; 4]; bytes.copy_from_slice(&slot.payload[0..4]); Some(u32::from_le_bytes(bytes)) } /// Get the captured environment slice of a closure. Returns None if kind mismatch or invalid ref. #[cfg(test)] pub fn closure_env_slice(&self, r: HeapRef) -> Option<&[Value]> { let idx = r.0 as usize; let slot = self.objects.get(idx)?.as_ref()?; if slot.header.kind != ObjectKind::Closure { return None; } if slot.payload.len() >= 8 { let mut nbytes = [0u8; 4]; nbytes.copy_from_slice(&slot.payload[4..8]); let env_len = u32::from_le_bytes(nbytes) as usize; if let Some(env) = slot.closure_env.as_deref() { debug_assert_eq!(env.len(), env_len); } } slot.closure_env.as_deref() } /// Mark phase: starting from the given roots, traverse and set mark bits /// on all reachable objects. Uses an explicit stack to avoid recursion. pub fn mark_from_roots>(&mut self, roots: I) { let mut stack: Vec = roots.into_iter().collect(); while let Some(r) = stack.pop() { if !self.is_valid(r) { continue; } // If already marked, skip. let already_marked = self.header(r).map(|h: &ObjectHeader| h.is_marked()).unwrap_or(false); if already_marked { continue; } // Set mark bit. if let Some(h) = self.header_mut(r) { h.set_marked(true); } // Push children by scanning payload directly (no intermediate Vec allocs). let idx = r.0 as usize; if let Some(Some(obj)) = self.objects.get(idx) { match obj.header.kind { ObjectKind::Array => { if let Some(elems) = obj.array_elems.as_ref() { for val in elems.iter() { if let Value::HeapRef(child) = val && self.is_valid(*child) { let marked = self .header(*child) .map(|h: &ObjectHeader| h.is_marked()) .unwrap_or(false); if !marked { stack.push(*child); } } } } } ObjectKind::Closure => { debug_assert_eq!(obj.payload.len(), 8, "closure payload must be 8 bytes"); let mut nbytes = [0u8; 4]; nbytes.copy_from_slice(&obj.payload[4..8]); let env_len = u32::from_le_bytes(nbytes) as usize; if let Some(env) = obj.closure_env.as_ref() { debug_assert_eq!( env.len(), env_len, "closure env len must match encoded env_len" ); for val in env[..env_len].iter() { if let Value::HeapRef(child) = val && self.is_valid(*child) { let marked = self .header(*child) .map(|h: &ObjectHeader| h.is_marked()) .unwrap_or(false); if !marked { stack.push(*child); } } } } } ObjectKind::Coroutine => { if let Some(co) = obj.coroutine.as_ref() { for val in co.stack.iter() { if let Value::HeapRef(child) = val && self.is_valid(*child) { let marked = self .header(*child) .map(|h: &ObjectHeader| h.is_marked()) .unwrap_or(false); if !marked { stack.push(*child); } } } } } _ => {} } } } } /// Sweep phase: reclaim unmarked objects by turning their slots into /// tombstones (None), and clear the mark bit on the remaining live ones /// to prepare for the next GC cycle. Does not move or compact objects. pub fn sweep(&mut self) { for (idx, slot) in self.objects.iter_mut().enumerate() { if let Some(obj) = slot { if obj.header.is_marked() { // Live: clear mark for next cycle. obj.header.set_marked(false); } else { // Unreachable: reclaim by dropping and turning into tombstone. *slot = None; self.free_list.push(idx); } } } } /// Current number of allocated (live) objects. pub fn len(&self) -> usize { self.objects.iter().filter(|s| s.is_some()).count() } /// Enumerate handles of coroutines that are currently suspended (i.e., not running): /// Ready or Sleeping. These must be treated as GC roots by the runtime so their /// stacks/frames are scanned during mark. pub fn suspended_coroutine_handles(&self) -> Vec { let mut out = Vec::new(); for (idx, slot) in self.objects.iter().enumerate() { if let Some(obj) = slot && obj.header.kind == ObjectKind::Coroutine && let Some(co) = &obj.coroutine && matches!(co.state, CoroutineState::Ready | CoroutineState::Sleeping) { out.push(HeapRef(idx as u32)); } } out } } #[cfg(test)] mod tests { use super::*; #[test] fn basic_allocation_returns_valid_refs() { let mut heap = Heap::new(); let r1 = heap.allocate_object(ObjectKind::String, b"hello"); let r2 = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3, 4]); let r3 = heap.allocate_array(vec![]); assert!(heap.is_valid(r1)); assert!(heap.is_valid(r2)); assert!(heap.is_valid(r3)); assert_eq!(heap.len(), 3); let h1 = heap.header(r1).unwrap(); assert_eq!(h1.kind, ObjectKind::String); assert_eq!(h1.payload_len, 5); let h2 = heap.header(r2).unwrap(); assert_eq!(h2.kind, ObjectKind::Bytes); assert_eq!(h2.payload_len, 4); let h3 = heap.header(r3).unwrap(); assert_eq!(h3.kind, ObjectKind::Array); assert_eq!(h3.payload_len, 0); } #[test] fn allocate_and_transition_coroutine() { let mut heap = Heap::new(); // Create a coroutine with a small stack containing a HeapRef to verify GC traversal later. let obj_ref = heap.allocate_object(ObjectKind::Bytes, &[4, 5, 6]); let coro = heap.allocate_coroutine( 0, CoroutineState::Ready, 0, vec![Value::Int32(1), Value::HeapRef(obj_ref)], vec![CallFrame { return_pc: 0, stack_base: 0, func_idx: 0 }], ); let hdr = heap.header(coro).unwrap(); assert_eq!(hdr.kind, ObjectKind::Coroutine); assert_eq!(hdr.payload_len, 0); // Manually mutate state transitions via access to inner data. { let slot = heap.objects.get_mut(coro.0 as usize).and_then(|s| s.as_mut()).unwrap(); let co = slot.coroutine.as_mut().unwrap(); assert_eq!(co.state, CoroutineState::Ready); co.state = CoroutineState::Running; assert_eq!(co.state, CoroutineState::Running); co.state = CoroutineState::Sleeping; co.wake_tick = 42; assert_eq!(co.wake_tick, 42); co.state = CoroutineState::Finished; assert_eq!(co.state, CoroutineState::Finished); } // GC should mark the object referenced from the coroutine stack when the coroutine is a root. heap.mark_from_roots([coro]); assert!(heap.header(obj_ref).unwrap().is_marked()); } #[test] fn mark_reachable_through_array() { let mut heap = Heap::new(); // Target object B (unreferenced yet) let b = heap.allocate_object(ObjectKind::Bytes, &[9, 9, 9]); // Array A that contains a reference to B among other primitives let a = heap.allocate_array(vec![Value::Int32(1), Value::HeapRef(b), Value::Boolean(false)]); // Mark starting from root A heap.mark_from_roots([a]); // Both A and B must be marked; random other objects are not allocated assert!(heap.header(a).unwrap().is_marked()); assert!(heap.header(b).unwrap().is_marked()); } #[test] fn mark_does_not_mark_unreachable() { let mut heap = Heap::new(); let unreachable = heap.allocate_object(ObjectKind::String, b"orphan"); let root = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]); heap.mark_from_roots([root]); assert!(heap.header(root).unwrap().is_marked()); assert!(!heap.header(unreachable).unwrap().is_marked()); } #[test] fn mark_handles_cycles() { let mut heap = Heap::new(); // Create two arrays that reference each other: A -> B, B -> A // Allocate empty arrays first to get handles let a = heap.allocate_array(vec![]); let b = heap.allocate_array(vec![]); // Now mutate their internal vectors via re-allocation pattern: // replace with arrays containing cross-references. Since our simple // heap doesn't support in-place element edits via API, simulate by // directly editing stored objects. if let Some(slot) = heap.objects.get_mut(a.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(b)]); obj.header.payload_len = 1; } } if let Some(slot) = heap.objects.get_mut(b.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(a)]); obj.header.payload_len = 1; } } // Mark from A; should terminate and mark both. heap.mark_from_roots([a]); assert!(heap.header(a).unwrap().is_marked()); assert!(heap.header(b).unwrap().is_marked()); } #[test] fn closure_allocation_with_empty_env() { let mut heap = Heap::new(); let c = heap.alloc_closure(42, &[]); assert!(heap.is_valid(c)); let h = heap.header(c).unwrap(); assert_eq!(h.kind, ObjectKind::Closure); // payload has only metadata (8 bytes) assert_eq!(h.payload_len, 8); assert_eq!(heap.closure_fn_id(c), Some(42)); let env = heap.closure_env_slice(c).unwrap(); assert_eq!(env.len(), 0); } #[test] fn closure_allocation_with_env_and_access() { let mut heap = Heap::new(); let a = heap.allocate_object(ObjectKind::String, b"a"); let env_vals = vec![Value::Int32(7), Value::HeapRef(a), Value::Boolean(true)]; let c = heap.alloc_closure(7, &env_vals); let h = heap.header(c).unwrap(); assert_eq!(h.kind, ObjectKind::Closure); assert_eq!(h.payload_len, 8); assert_eq!(heap.closure_fn_id(c), Some(7)); let env = heap.closure_env_slice(c).unwrap(); assert_eq!(env, &env_vals[..]); // GC traversal should see the inner HeapRef in closure env when marking. heap.mark_from_roots([c]); assert!(heap.header(c).unwrap().is_marked()); assert!(heap.header(a).unwrap().is_marked()); } #[test] fn sweep_reclaims_unreachable_and_invalidates_handles() { let mut heap = Heap::new(); // Allocate two objects; only one will be a root. let unreachable = heap.allocate_object(ObjectKind::String, b"orphan"); let root = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]); // Mark from root and then sweep. heap.mark_from_roots([root]); // Precondition: root marked, unreachable not marked. assert!(heap.header(root).unwrap().is_marked()); assert!(!heap.header(unreachable).unwrap().is_marked()); heap.sweep(); // Unreachable must be reclaimed: handle becomes invalid. assert!(!heap.is_valid(unreachable)); assert!(heap.header(unreachable).is_none()); // Root must survive and have its mark bit cleared for next cycle. assert!(heap.is_valid(root)); assert!(!heap.header(root).unwrap().is_marked()); } #[test] fn sweep_keeps_indices_stable_and_len_counts_live() { let mut heap = Heap::new(); let a = heap.allocate_object(ObjectKind::String, b"a"); let b = heap.allocate_object(ObjectKind::String, b"b"); let c = heap.allocate_object(ObjectKind::String, b"c"); // Only keep A live. heap.mark_from_roots([a]); heap.sweep(); // B and C are now invalidated, A remains valid. assert!(heap.is_valid(a)); assert!(!heap.is_valid(b)); assert!(!heap.is_valid(c)); // Len counts only live objects. assert_eq!(heap.len(), 1); // Indices are stable: A's index is still within the backing store bounds. // We can't access internal vector here, but stability is implied by handle not changing. assert_eq!(a.0, a.0); // placeholder sanity check } #[test] fn sweep_reuses_freed_slot_on_next_allocation() { let mut heap = Heap::new(); let dead = heap.allocate_object(ObjectKind::String, b"dead"); let live = heap.allocate_object(ObjectKind::String, b"live"); heap.mark_from_roots([live]); heap.sweep(); assert!(!heap.is_valid(dead)); assert_eq!(heap.free_list, vec![dead.0 as usize]); let reused = heap.allocate_object(ObjectKind::Bytes, &[1, 2, 3]); assert_eq!(reused, dead); assert!(heap.is_valid(reused)); assert!(heap.is_valid(live)); } #[test] fn live_handles_remain_stable_when_freelist_is_reused() { let mut heap = Heap::new(); let live = heap.allocate_object(ObjectKind::String, b"live"); let dead = heap.allocate_object(ObjectKind::String, b"dead"); heap.mark_from_roots([live]); heap.sweep(); let replacement = heap.allocate_object(ObjectKind::Bytes, &[9]); assert_eq!(replacement, dead); assert_eq!(heap.header(live).unwrap().kind, ObjectKind::String); assert_eq!(heap.header(replacement).unwrap().kind, ObjectKind::Bytes); assert_eq!(live.0, 0); } #[test] fn freelist_reuse_is_deterministic_lifo() { let mut heap = Heap::new(); let a = heap.allocate_object(ObjectKind::String, b"a"); let b = heap.allocate_object(ObjectKind::String, b"b"); let c = heap.allocate_object(ObjectKind::String, b"c"); heap.mark_from_roots([]); heap.sweep(); assert_eq!(heap.free_list, vec![a.0 as usize, b.0 as usize, c.0 as usize]); let r1 = heap.allocate_object(ObjectKind::Bytes, &[1]); let r2 = heap.allocate_object(ObjectKind::Bytes, &[2]); let r3 = heap.allocate_object(ObjectKind::Bytes, &[3]); assert_eq!(r1, c); assert_eq!(r2, b); assert_eq!(r3, a); } #[test] fn sweep_reclaims_unrooted_cycle() { let mut heap = Heap::new(); // Build a 2-node cycle A <-> B using internal mutation (module-private access). let a = heap.allocate_array(vec![]); let b = heap.allocate_array(vec![]); // Make A point to B and B point to A. if let Some(slot) = heap.objects.get_mut(a.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(b)]); obj.header.payload_len = 1; } } if let Some(slot) = heap.objects.get_mut(b.0 as usize) { if let Some(obj) = slot.as_mut() { obj.array_elems = Some(vec![Value::HeapRef(a)]); obj.header.payload_len = 1; } } // No roots: perform sweep directly; both should be reclaimed. heap.sweep(); assert!(!heap.is_valid(a)); assert!(!heap.is_valid(b)); assert_eq!(heap.len(), 0); } #[test] fn gc_scans_closure_env_and_keeps_captured_heap_object() { let mut heap = Heap::new(); // Captured heap object. let obj = heap.allocate_object(ObjectKind::Bytes, &[4, 5, 6]); // Closure capturing the heap object among other primitive values. let env = [Value::Boolean(true), Value::HeapRef(obj), Value::Int32(123)]; let clo = heap.alloc_closure(1, &env); // Mark from closure root: both closure and captured object must be marked. heap.mark_from_roots([clo]); assert!(heap.header(clo).unwrap().is_marked()); assert!(heap.header(obj).unwrap().is_marked()); // Sweep should keep both and clear their marks. heap.sweep(); assert!(heap.is_valid(clo)); assert!(heap.is_valid(obj)); assert!(!heap.header(clo).unwrap().is_marked()); assert!(!heap.header(obj).unwrap().is_marked()); } #[test] fn gc_scans_nested_closures_and_keeps_inner_when_outer_is_rooted() { let mut heap = Heap::new(); // Inner closure (no env). let inner = heap.alloc_closure(2, &[]); // Outer closure captures the inner closure as a Value::HeapRef. let outer = heap.alloc_closure(3, &[Value::HeapRef(inner)]); // Root only the outer closure. heap.mark_from_roots([outer]); // Both must be marked reachable. assert!(heap.header(outer).unwrap().is_marked()); assert!(heap.header(inner).unwrap().is_marked()); // After sweep, both survive and have marks cleared. heap.sweep(); assert!(heap.is_valid(outer)); assert!(heap.is_valid(inner)); assert!(!heap.header(outer).unwrap().is_marked()); assert!(!heap.header(inner).unwrap().is_marked()); } #[test] fn gc_collects_unreferenced_closure_and_captures() { let mut heap = Heap::new(); // Captured heap object and a closure capturing it. let captured = heap.allocate_object(ObjectKind::String, b"dead"); let clo = heap.alloc_closure(9, &[Value::HeapRef(captured)]); // No roots are provided; sweeping should reclaim both. heap.sweep(); assert!(!heap.is_valid(clo)); assert!(!heap.is_valid(captured)); assert_eq!(heap.len(), 0); } }