From 860f0db31ca32a25c9394f5adabd29f11c3efa58 Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Fri, 20 Feb 2026 06:45:53 +0000 Subject: [PATCH] pr6.1 --- crates/console/prometeu-vm/src/heap.rs | 96 +++++++- files/LOAD.md | 186 +++++++++++++++ files/TODOs.md | 303 +++++++++++++++++++++++++ files/VM RESET.md | 9 - 4 files changed, 582 insertions(+), 12 deletions(-) create mode 100644 files/LOAD.md diff --git a/crates/console/prometeu-vm/src/heap.rs b/crates/console/prometeu-vm/src/heap.rs index 01230252..fc3a01c3 100644 --- a/crates/console/prometeu-vm/src/heap.rs +++ b/crates/console/prometeu-vm/src/heap.rs @@ -10,6 +10,11 @@ pub struct StoredObject { /// Optional typed elements for `ObjectKind::Array`. /// When present, `header.payload_len` must equal `array_elems.len() as u32`. pub array_elems: Option>, + /// Optional captured environment for `ObjectKind::Closure`. + /// `header.payload_len` stores the fixed-size metadata length (8 bytes): + /// [fn_id: u32][env_len: u32]. + /// The actual env slots are stored here to remain GC-visible. + pub closure_env: Option>, } /// Simple vector-backed heap. No GC or compaction. @@ -26,7 +31,7 @@ impl Heap { /// Returns an opaque `HeapRef` handle. pub fn allocate_object(&mut self, kind: ObjectKind, payload: &[u8]) -> HeapRef { let header = ObjectHeader::new(kind, payload.len() as u32); - let obj = StoredObject { header, payload: payload.to_vec(), array_elems: None }; + let obj = StoredObject { header, payload: payload.to_vec(), array_elems: None, closure_env: None }; let idx = self.objects.len(); // No free-list reuse in this PR: append and keep indices stable. self.objects.push(Some(obj)); @@ -37,13 +42,35 @@ impl Heap { /// `payload_len` stores the element count; raw `payload` bytes are empty. pub fn allocate_array(&mut self, elements: Vec) -> HeapRef { let header = ObjectHeader::new(ObjectKind::Array, elements.len() as u32); - let obj = StoredObject { header, payload: Vec::new(), array_elems: Some(elements) }; + let obj = StoredObject { header, payload: Vec::new(), array_elems: Some(elements), closure_env: None }; let idx = self.objects.len(); // No free-list reuse in this PR: append and keep indices stable. self.objects.push(Some(obj)); HeapRef(idx as u32) } + /// Allocate a new `Closure` object with the given function id and captured environment. + /// Layout: + /// payload bytes: [fn_id: u32][env_len: u32] + /// env slots: stored out-of-line in `closure_env` for GC visibility + pub fn alloc_closure(&mut self, fn_id: u32, env_values: &[Value]) -> HeapRef { + let mut payload = Vec::with_capacity(8); + payload.extend_from_slice(&fn_id.to_le_bytes()); + let env_len = env_values.len() as u32; + payload.extend_from_slice(&env_len.to_le_bytes()); + + let header = ObjectHeader::new(ObjectKind::Closure, payload.len() as u32); + let obj = StoredObject { + header, + payload, + array_elems: None, + closure_env: Some(env_values.to_vec()), + }; + let idx = self.objects.len(); + self.objects.push(Some(obj)); + HeapRef(idx as u32) + } + /// Returns true if this handle refers to an allocated object. pub fn is_valid(&self, r: HeapRef) -> bool { let idx = r.0 as usize; @@ -84,8 +111,18 @@ impl Heap { .collect::>() .into_iter() } + ObjectKind::Closure => { + // Traverse only Value::HeapRef inside the closure env. + o.closure_env + .as_ref() + .into_iter() + .flat_map(|v| v.iter()) + .filter_map(|val| if let Value::HeapRef(h) = val { Some(*h) } else { None }) + .collect::>() + .into_iter() + } // These kinds have no inner references in this PR. - ObjectKind::String | ObjectKind::Bytes | ObjectKind::Closure | ObjectKind::UserData | ObjectKind::Unknown => { + ObjectKind::String | ObjectKind::Bytes | ObjectKind::UserData | ObjectKind::Unknown => { Vec::new().into_iter() } }) @@ -93,6 +130,25 @@ impl Heap { .flatten() } + /// Read the `fn_id` stored in a closure object. Returns None if kind mismatch or invalid ref. + pub fn closure_fn_id(&self, r: HeapRef) -> Option { + let idx = r.0 as usize; + let slot = self.objects.get(idx)?.as_ref()?; + if slot.header.kind != ObjectKind::Closure { return None; } + if slot.payload.len() < 8 { return None; } + let mut bytes = [0u8; 4]; + bytes.copy_from_slice(&slot.payload[0..4]); + Some(u32::from_le_bytes(bytes)) + } + + /// Get the captured environment slice of a closure. Returns None if kind mismatch or invalid ref. + pub fn closure_env_slice(&self, r: HeapRef) -> Option<&[Value]> { + let idx = r.0 as usize; + let slot = self.objects.get(idx)?.as_ref()?; + if slot.header.kind != ObjectKind::Closure { return None; } + slot.closure_env.as_deref() + } + /// Mark phase: starting from the given roots, traverse and set mark bits /// on all reachable objects. Uses an explicit stack to avoid recursion. pub fn mark_from_roots>(&mut self, roots: I) { @@ -238,6 +294,40 @@ mod tests { assert!(heap.header(b).unwrap().is_marked()); } + #[test] + fn closure_allocation_with_empty_env() { + let mut heap = Heap::new(); + let c = heap.alloc_closure(42, &[]); + assert!(heap.is_valid(c)); + let h = heap.header(c).unwrap(); + assert_eq!(h.kind, ObjectKind::Closure); + // payload has only metadata (8 bytes) + assert_eq!(h.payload_len, 8); + assert_eq!(heap.closure_fn_id(c), Some(42)); + let env = heap.closure_env_slice(c).unwrap(); + assert_eq!(env.len(), 0); + } + + #[test] + fn closure_allocation_with_env_and_access() { + let mut heap = Heap::new(); + let a = heap.allocate_object(ObjectKind::String, b"a"); + let env_vals = vec![Value::Int32(7), Value::HeapRef(a), Value::Boolean(true)]; + let c = heap.alloc_closure(7, &env_vals); + + let h = heap.header(c).unwrap(); + assert_eq!(h.kind, ObjectKind::Closure); + assert_eq!(h.payload_len, 8); + assert_eq!(heap.closure_fn_id(c), Some(7)); + let env = heap.closure_env_slice(c).unwrap(); + assert_eq!(env, &env_vals[..]); + + // GC traversal should see the inner HeapRef in closure env when marking. + heap.mark_from_roots([c]); + assert!(heap.header(c).unwrap().is_marked()); + assert!(heap.header(a).unwrap().is_marked()); + } + #[test] fn sweep_reclaims_unreachable_and_invalidates_handles() { let mut heap = Heap::new(); diff --git a/files/LOAD.md b/files/LOAD.md new file mode 100644 index 00000000..cf3e6842 --- /dev/null +++ b/files/LOAD.md @@ -0,0 +1,186 @@ +# PR-6 — PBX Declared Syscalls Section (Load-Time Resolution Integration) + +## Briefing + +Chapter 16 requires that cartridges declare required syscalls using canonical identities `(module, name, version)` and that these be resolved at load time before the VM begins execution. + +Until now, resolution has existed only as an API (PR5.3.2). The PBX on-disk format currently does not carry declared syscalls, and the loader cannot enforce load-time resolution deterministically. + +This PR introduces a minimal, production-grade PBX section for **declared syscalls only** and integrates it into the PBX load sequence. + +**Scope note (important):** + +* PBX contains **program + syscall metadata** only. +* Assets (`asset_table`, `preload`, etc.) are explicitly **out of scope** for this PR and will be handled later via `asset.pa`. + +After this PR: + +* `program.pbx` contains a `SYSC` (declared syscalls) section. +* The PBX loader parses `SYSC` and resolves identities at load time. +* Load fails deterministically if resolution fails. +* The VM continues to execute `SYSCALL ` only. + +No backward compatibility. No fallback to external manifests in production. + +--- + +## Target + +1. Define a minimal PBX section for declared syscalls. +2. Extend the PBX parser/loader to read this section. +3. Integrate `resolve_program_syscalls()` into PBX load. +4. Enforce load-time failure on unknown or unauthorized syscalls. +5. Keep VM runtime strictly numeric. + +--- + +## PBX Section Format (Authoritative for This PR) + +Add a new PBX chunk: + +* Chunk ID: `SYSC` + +Binary layout: + +``` +u32 count +repeat count times: + u16 module_len + [module_len bytes UTF-8] + u16 name_len + [name_len bytes UTF-8] + u16 version +``` + +Rules: + +* UTF-8 strings only. +* No string table in this PR (keep it minimal). +* This section is REQUIRED for PBX v0 after this PR. +* If absent → load error. +* Duplicate `(module,name,version)` entries → load error. + +**Out of scope:** Any asset-related metadata. Do not add `ASSET_TABLE`, `PRELOAD`, or anything similar to PBX in this PR. + +--- + +## Load Sequence (After PR-6) + +1. Parse PBX header and TOC. +2. Parse `SYSC` section into `Vec`. +3. Call: + + ```rust + resolve_program_syscalls(&declared, vm.capabilities) + ``` +4. If resolution fails → abort load deterministically. +5. Store resolved syscalls inside VM (or a `LoadedProgram` struct). +6. Start VM execution. + +No runtime name resolution allowed. + +--- + +## Work Items + +### 1) PBX Parser / Loader Extension + +* Add support for `SYSC` chunk in the PBX parser/loader. +* Return `declared_syscalls: Vec` as part of `ProgramImage` (or equivalent). +* If `SYSC` is missing → return load error. + +### 2) Load-Time Resolver Integration + +* In the PBX load path: + + * Parse declared syscalls. + * Resolve using `prometeu_hal::syscalls::resolve_program_syscalls`. + * Enforce capability checks at load time. + * If resolution fails → load fails (no runtime fallback). + +* Do NOT modify VM execution core beyond storing the resolved mapping. + +### 3) VM State Storage + +* Add or finalize storage for resolved syscalls (if not already present). +* VM must NOT use canonical strings at runtime. +* VM runtime continues to execute only numeric IDs. + +### 4) Error Handling + +PBX load must fail deterministically if: + +* Unknown `(module,name,version)`. +* Capability mismatch. +* Duplicate identities in `SYSC`. +* `SYSC` chunk is missing. +* Malformed `SYSC` payload (lengths/UTF-8). + +Errors must be explicit and deterministic. + +--- + +## Acceptance Checklist + +* [ ] PBX format supports `SYSC` chunk. +* [ ] Loader parses declared syscalls from PBX. +* [ ] Resolver runs during PBX load (before VM execution). +* [ ] Load fails on unknown syscall. +* [ ] Load fails on capability violation. +* [ ] Load fails when `SYSC` chunk is missing. +* [ ] No runtime name resolution exists. +* [ ] `cargo test` passes. + +--- + +## Tests + +Add tests covering: + +1. Valid PBX with one syscall identity → loads successfully. +2. PBX with unknown syscall identity → load error. +3. PBX with capability violation → load error. +4. PBX without `SYSC` section → load error. +5. PBX with duplicate identity entries → load error. +6. PBX with malformed `SYSC` payload (bad lengths/invalid UTF-8) → load error. + +Tests must construct minimal synthetic PBX images in-memory. + +Do NOT rely on external files. + +--- + +## Junie Instructions + +You MAY: + +* Extend PBX parser/loader to support `SYSC`. +* Integrate the resolver into the PBX load path. +* Add deterministic tests for load-time resolution. + +You MUST NOT: + +* Add backward compatibility paths. +* Add fallback to JSON manifests. +* Add any asset-table or preload metadata to PBX. +* Change VM runtime dispatch logic. +* Modify syscall numeric IDs. + +If PBX container format details (header/TOC/chunk reading) are unclear: + +* STOP. +* Ask for clarification before inventing new unrelated chunk structures. + +No assumptions beyond the `SYSC` layout defined above. + +--- + +## Definition of Done + +After this PR: + +* Syscall resolution is fully load-time for PBX. +* PBX is authoritative for declared syscalls. +* VM executes only numeric syscalls. +* No legacy or dev fallback exists in the production load path. +* No asset responsibilities are added to PBX. diff --git a/files/TODOs.md b/files/TODOs.md index e69de29b..7f25ea78 100644 --- a/files/TODOs.md +++ b/files/TODOs.md @@ -0,0 +1,303 @@ +# PR-6.2 — Closure Capture Materialization + +## Briefing + +Closures must capture values from the current stack frame into a heap-allocated environment. + +This PR defines: + +* How captured values are materialized. +* How the environment layout is constructed. + +No CALL_CLOSURE yet. + +--- + +## Target + +Define bytecode semantics for closure creation: + +Introduce instruction (placeholder name): + +`MAKE_CLOSURE fn_id, capture_count` + +Semantics: + +* Pop `capture_count` values from stack (top-first). +* Allocate closure object with those values stored in-order. +* Push resulting `HeapRef` to stack. + +--- + +## Work Items + +1. Define new opcode `MAKE_CLOSURE`. +2. Implement stack semantics. +3. Ensure captured values are copied (not borrowed). +4. Update interpreter to support opcode. + +--- + +## Acceptance Checklist + +* [ ] MAKE_CLOSURE opcode exists. +* [ ] Stack pops correct number of values. +* [ ] Closure allocated correctly. +* [ ] Closure ref pushed to stack. + +--- + +## Tests + +1. Create closure capturing 0 values. +2. Create closure capturing 2 values. +3. Validate env order correctness. + +--- + +## Junie Instructions + +You MAY: + +* Add opcode. +* Modify interpreter dispatch. +* Add tests. + +You MUST NOT: + +* Implement CALL_CLOSURE yet. +* Modify GC behavior. +* Change verifier in this PR. + +If capture order semantics unclear, STOP and ask. + +--- + +## Definition of Done + +Closures can be created with captured environment and exist as heap values. + +--- + +# PR-6.3 — CALL_CLOSURE Instruction + +## Briefing + +Closures must be invokable at runtime. This PR introduces dynamic invocation semantics for closures. + +--- + +## Target + +Introduce opcode: + +`CALL_CLOSURE arg_count` + +Semantics: + +* Stack layout before call: + + ``` + [... args..., closure_ref] + ``` +* Pop closure_ref. +* Validate it is ObjectKind::Closure. +* Pop `arg_count` arguments. +* Create new call frame: + + * Locals initialized with captured env first (design choice below). + * Arguments appended after captures. +* Jump to function entry (fn_id). + +--- + +## Work Items + +1. Add `CALL_CLOSURE` opcode. +2. Validate closure_ref type. +3. Integrate into call frame creation logic. +4. Respect function signature for ret_slots. + +--- + +## Acceptance Checklist + +* [ ] CALL_CLOSURE implemented. +* [ ] Correct stack consumption. +* [ ] Correct frame initialization. +* [ ] Error on non-closure value. + +--- + +## Tests + +1. Simple closure returning constant. +2. Closure capturing value and using it. +3. Error when calling non-closure. + +--- + +## Junie Instructions + +You MAY: + +* Add opcode and dispatch. +* Modify call frame initialization. +* Add tests. + +You MUST NOT: + +* Redesign stack model. +* Introduce coroutine behavior here. +* Change GC. + +If frame layout decision is ambiguous, STOP and ask before choosing ordering. + +--- + +## Definition of Done + +Closures can be invoked dynamically and execute correctly. + +--- + +# PR-6.4 — GC Traversal for Closures + +## Briefing + +Closures introduce heap-to-heap references through their captured environments. + +The GC must traverse: + +closure -> env -> inner HeapRefs + +This PR updates the GC mark phase to correctly traverse closure environments. + +--- + +## Target + +Extend GC mark logic: + +* When encountering ObjectKind::Closure: + + * Iterate over env values. + * If a value contains HeapRef → mark referenced object. + +--- + +## Work Items + +1. Update mark traversal switch for Closure. +2. Ensure no panics on malformed env. +3. Add tests for nested closure references. + +--- + +## Acceptance Checklist + +* [ ] GC marks env HeapRefs. +* [ ] No regression in existing GC tests. +* [ ] Nested closures retained correctly. + +--- + +## Tests + +1. Closure capturing another closure. +2. Closure capturing heap object. +3. Unreferenced closure collected. + +--- + +## Junie Instructions + +You MAY: + +* Modify mark traversal. +* Add GC tests. + +You MUST NOT: + +* Implement compaction. +* Change sweep policy. + +If unsure whether env values may contain non-heap values, ask before assuming. + +--- + +## Definition of Done + +GC correctly traverses closure environments. + +--- + +# PR-6.5 — Verifier Support for Closures + +## Briefing + +The verifier must understand closure values as a distinct type and validate dynamic calls safely. + +Closures are heap objects but semantically represent callable values. + +--- + +## Target + +Extend verifier to: + +* Introduce a stack type: `ClosureValue`. +* Validate MAKE_CLOSURE stack effects. +* Validate CALL_CLOSURE argument counts. +* Validate ret_slots against function signature. + +--- + +## Work Items + +1. Add closure type to verifier type lattice. +2. Define stack transitions for MAKE_CLOSURE. +3. Define stack transitions for CALL_CLOSURE. +4. Ensure deterministic failure on misuse. + +--- + +## Acceptance Checklist + +* [ ] Verifier understands closure values. +* [ ] Invalid CALL_CLOSURE rejected. +* [ ] ret_slots validated. +* [ ] All tests pass. + +--- + +## Tests + +1. Valid closure call passes verification. +2. CALL_CLOSURE with wrong arg count fails. +3. CALL_CLOSURE on non-closure fails. + +--- + +## Junie Instructions + +You MAY: + +* Extend verifier type model. +* Add tests. + +You MUST NOT: + +* Weaken verification rules. +* Introduce runtime-only checks instead of verifier checks. + +If closure typing conflicts with current stack model, STOP and ask. + +--- + +## Definition of Done + +Verifier fully supports closure creation and invocation. + +--- + diff --git a/files/VM RESET.md b/files/VM RESET.md index a8fa4440..3f5eeeb3 100644 --- a/files/VM RESET.md +++ b/files/VM RESET.md @@ -1,12 +1,3 @@ -6 — Closures (first-class user functions) - -6.1. Definir objeto Closure no heap: fn_id + env (captures) + metadata mínima. -6.2. Definir como capturas são materializadas (layout do env e como o bytecode cria closures). -6.3. Implementar instruções/semântica para criar closure e para CALL_CLOSURE. -6.4. Atualizar GC traversal: closure → env → heap refs internos. -6.5. Atualizar verifier: tipo “closure value”, validação de call sites, ret_slots. -6.6. Testes: closure simples, closure capturando, closure retornando outra closure. - 7 — Coroutines (único modelo de concorrência, cooperativo) 7.1. Definir objeto Coroutine no heap: stack/frames próprios, status, wake time, mailbox/queue se existir.