diff --git a/crates/console/prometeu-bytecode/src/lib.rs b/crates/console/prometeu-bytecode/src/lib.rs index 1e815bd5..3a936e7a 100644 --- a/crates/console/prometeu-bytecode/src/lib.rs +++ b/crates/console/prometeu-bytecode/src/lib.rs @@ -18,6 +18,6 @@ pub use assembler::{assemble, AsmError}; pub use decoder::{decode_next, DecodeError}; pub use disassembler::disassemble; pub use layout::{compute_function_layouts, FunctionLayout}; -pub use model::{BytecodeLoader, FunctionMeta, LoadError}; +pub use model::{BytecodeLoader, FunctionMeta, LoadError, SyscallDecl}; pub use program_image::ProgramImage; pub use value::{HeapRef, Value}; diff --git a/crates/console/prometeu-bytecode/src/model.rs b/crates/console/prometeu-bytecode/src/model.rs index 2c162116..aabffc01 100644 --- a/crates/console/prometeu-bytecode/src/model.rs +++ b/crates/console/prometeu-bytecode/src/model.rs @@ -1,6 +1,7 @@ use crate::abi::SourceSpan; use crate::opcode::OpCode; use serde::{Deserialize, Serialize}; +use std::collections::HashSet; /// An entry in the Constant Pool. /// @@ -35,6 +36,9 @@ pub enum LoadError { InvalidFunctionIndex, MalformedHeader, MalformedSection, + MissingSyscallSection, + DuplicateSyscallIdentity, + InvalidUtf8, UnexpectedEof, } @@ -60,6 +64,22 @@ pub struct Export { pub func_idx: u32, } +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct SyscallDecl { + pub module: String, + pub name: String, + pub version: u16, + pub arg_slots: u16, + pub ret_slots: u16, +} + +const SECTION_KIND_CONST_POOL: u32 = 0; +const SECTION_KIND_FUNCTIONS: u32 = 1; +const SECTION_KIND_CODE: u32 = 2; +const SECTION_KIND_DEBUG: u32 = 3; +const SECTION_KIND_EXPORTS: u32 = 4; +const SECTION_KIND_SYSCALLS: u32 = 5; + /// Represents the final serialized format of a PBS v0 module. /// /// This structure is a pure data container for the PBS format. It does NOT @@ -74,6 +94,7 @@ pub struct BytecodeModule { pub code: Vec, pub debug_info: Option, pub exports: Vec, + pub syscalls: Vec, } impl BytecodeModule { @@ -84,23 +105,25 @@ impl BytecodeModule { let debug_data = self.debug_info.as_ref().map(|di| self.serialize_debug(di)).unwrap_or_default(); let export_data = self.serialize_exports(); + let syscall_data = self.serialize_syscalls(); let mut final_sections = Vec::new(); if !cp_data.is_empty() { - final_sections.push((0, cp_data)); + final_sections.push((SECTION_KIND_CONST_POOL, cp_data)); } if !func_data.is_empty() { - final_sections.push((1, func_data)); + final_sections.push((SECTION_KIND_FUNCTIONS, func_data)); } if !code_data.is_empty() { - final_sections.push((2, code_data)); + final_sections.push((SECTION_KIND_CODE, code_data)); } if !debug_data.is_empty() { - final_sections.push((3, debug_data)); + final_sections.push((SECTION_KIND_DEBUG, debug_data)); } if !export_data.is_empty() { - final_sections.push((4, export_data)); + final_sections.push((SECTION_KIND_EXPORTS, export_data)); } + final_sections.push((SECTION_KIND_SYSCALLS, syscall_data)); let mut out = Vec::new(); // Magic "PBS\0" @@ -219,6 +242,26 @@ impl BytecodeModule { } data } + + fn serialize_syscalls(&self) -> Vec { + let mut data = Vec::new(); + data.extend_from_slice(&(self.syscalls.len() as u32).to_le_bytes()); + for syscall in &self.syscalls { + let module = syscall.module.as_bytes(); + let name = syscall.name.as_bytes(); + assert!(u16::try_from(module.len()).is_ok(), "SYSC module name exceeds u16 length"); + assert!(u16::try_from(name.len()).is_ok(), "SYSC syscall name exceeds u16 length"); + + data.extend_from_slice(&(module.len() as u16).to_le_bytes()); + data.extend_from_slice(module); + data.extend_from_slice(&(name.len() as u16).to_le_bytes()); + data.extend_from_slice(name); + data.extend_from_slice(&syscall.version.to_le_bytes()); + data.extend_from_slice(&syscall.arg_slots.to_le_bytes()); + data.extend_from_slice(&syscall.ret_slots.to_le_bytes()); + } + data + } } pub struct BytecodeLoader; @@ -296,35 +339,45 @@ impl BytecodeLoader { code: Vec::new(), debug_info: None, exports: Vec::new(), + syscalls: Vec::new(), }; + let mut has_syscalls = false; for (kind, offset, length) in sections { let section_data = &bytes[offset as usize..(offset + length) as usize]; match kind { - 0 => { + SECTION_KIND_CONST_POOL => { // Const Pool module.const_pool = parse_const_pool(section_data)?; } - 1 => { + SECTION_KIND_FUNCTIONS => { // Functions module.functions = parse_functions(section_data)?; } - 2 => { + SECTION_KIND_CODE => { // Code module.code = section_data.to_vec(); } - 3 => { + SECTION_KIND_DEBUG => { // Debug Info module.debug_info = Some(parse_debug_section(section_data)?); } - 4 => { + SECTION_KIND_EXPORTS => { // Exports module.exports = parse_exports(section_data)?; } + SECTION_KIND_SYSCALLS => { + module.syscalls = parse_syscalls(section_data)?; + has_syscalls = true; + } _ => {} // Skip unknown or optional sections } } + if !has_syscalls { + return Err(LoadError::MissingSyscallSection); + } + // Additional validations validate_module(&module)?; @@ -520,7 +573,74 @@ fn parse_exports(data: &[u8]) -> Result, LoadError> { Ok(exports) } +fn parse_syscalls(data: &[u8]) -> Result, LoadError> { + if data.len() < 4 { + return Err(LoadError::MalformedSection); + } + + let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + let mut syscalls = Vec::with_capacity(count); + let mut pos = 4; + + for _ in 0..count { + if pos + 2 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let module_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize; + pos += 2; + if pos + module_len > data.len() { + return Err(LoadError::UnexpectedEof); + } + let module = + std::str::from_utf8(&data[pos..pos + module_len]).map_err(|_| LoadError::InvalidUtf8)?; + pos += module_len; + + if pos + 2 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let name_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize; + pos += 2; + if pos + name_len > data.len() { + return Err(LoadError::UnexpectedEof); + } + let name = + std::str::from_utf8(&data[pos..pos + name_len]).map_err(|_| LoadError::InvalidUtf8)?; + pos += name_len; + + if pos + 6 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let version = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()); + let arg_slots = u16::from_le_bytes(data[pos + 2..pos + 4].try_into().unwrap()); + let ret_slots = u16::from_le_bytes(data[pos + 4..pos + 6].try_into().unwrap()); + pos += 6; + + syscalls.push(SyscallDecl { + module: module.to_owned(), + name: name.to_owned(), + version, + arg_slots, + ret_slots, + }); + } + + if pos != data.len() { + return Err(LoadError::MalformedSection); + } + + Ok(syscalls) +} + fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> { + let mut syscall_identities = HashSet::with_capacity(module.syscalls.len()); + for syscall in &module.syscalls { + if !syscall_identities + .insert((syscall.module.clone(), syscall.name.clone(), syscall.version)) + { + return Err(LoadError::DuplicateSyscallIdentity); + } + } + for func in &module.functions { // Opcode stream bounds if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() { @@ -591,6 +711,36 @@ mod tests { h } + fn minimal_module() -> BytecodeModule { + BytecodeModule { + version: 0, + const_pool: vec![], + functions: vec![], + code: vec![], + debug_info: None, + exports: vec![], + syscalls: vec![], + } + } + + fn build_pbs_with_sections(sections: Vec<(u32, Vec)>) -> Vec { + let mut data = create_header(sections.len() as u32); + let mut offset = 32 + (sections.len() as u32 * 12); + + for (kind, section_data) in §ions { + data.extend_from_slice(&kind.to_le_bytes()); + data.extend_from_slice(&offset.to_le_bytes()); + data.extend_from_slice(&(section_data.len() as u32).to_le_bytes()); + offset += section_data.len() as u32; + } + + for (_, section_data) in sections { + data.extend_from_slice(§ion_data); + } + + data + } + #[test] fn test_invalid_magic() { let mut data = create_header(0); @@ -645,10 +795,10 @@ mod tests { #[test] fn test_invalid_function_code_offset() { - let mut data = create_header(2); - // Section 1: Functions, Kind 1, Offset 64, Length 20 (Header 4 + 1 entry 16) + let mut data = create_header(3); + // Section 1: Functions, Kind 1, Offset 80, Length 20 (Header 4 + 1 entry 16) data.extend_from_slice(&1u32.to_le_bytes()); - data.extend_from_slice(&64u32.to_le_bytes()); + data.extend_from_slice(&80u32.to_le_bytes()); data.extend_from_slice(&20u32.to_le_bytes()); // Section 2: Code, Kind 2, Offset 128, Length 10 @@ -656,25 +806,31 @@ mod tests { data.extend_from_slice(&128u32.to_le_bytes()); data.extend_from_slice(&10u32.to_le_bytes()); + // Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty) + data.extend_from_slice(&5u32.to_le_bytes()); + data.extend_from_slice(&160u32.to_le_bytes()); + data.extend_from_slice(&4u32.to_le_bytes()); + data.resize(256, 0); // Setup functions section - let func_data_start = 64; + let func_data_start = 80; data[func_data_start..func_data_start + 4].copy_from_slice(&1u32.to_le_bytes()); // 1 function let entry_start = func_data_start + 4; data[entry_start..entry_start + 4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5 data[entry_start + 4..entry_start + 8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10 // 5 + 10 = 15 > 10 (code section length) + data[160..164].copy_from_slice(&0u32.to_le_bytes()); assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex)); } #[test] fn test_invalid_const_index() { - let mut data = create_header(2); - // Section 1: Const Pool, Kind 0, Offset 64, Length 4 (Empty CP) + let mut data = create_header(3); + // Section 1: Const Pool, Kind 0, Offset 80, Length 4 (Empty CP) data.extend_from_slice(&0u32.to_le_bytes()); - data.extend_from_slice(&64u32.to_le_bytes()); + data.extend_from_slice(&80u32.to_le_bytes()); data.extend_from_slice(&4u32.to_le_bytes()); // Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0) @@ -682,25 +838,105 @@ mod tests { data.extend_from_slice(&128u32.to_le_bytes()); data.extend_from_slice(&6u32.to_le_bytes()); + // Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty) + data.extend_from_slice(&5u32.to_le_bytes()); + data.extend_from_slice(&160u32.to_le_bytes()); + data.extend_from_slice(&4u32.to_le_bytes()); + data.resize(256, 0); // Setup empty CP - data[64..68].copy_from_slice(&0u32.to_le_bytes()); + data[80..84].copy_from_slice(&0u32.to_le_bytes()); // Setup code with PushConst 0 data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes()); data[130..134].copy_from_slice(&0u32.to_le_bytes()); + data[160..164].copy_from_slice(&0u32.to_le_bytes()); assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex)); } #[test] - fn test_valid_minimal_load() { + fn test_missing_sysc_section_is_rejected() { let data = create_header(0); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MissingSyscallSection)); + } + + #[test] + fn test_valid_minimal_load_with_empty_sysc() { + let data = minimal_module().serialize(); let module = BytecodeLoader::load(&data).unwrap(); assert_eq!(module.version, 0); assert!(module.const_pool.is_empty()); assert!(module.functions.is_empty()); assert!(module.code.is_empty()); + assert!(module.syscalls.is_empty()); + } + + #[test] + fn test_valid_sysc_roundtrip() { + let mut module = minimal_module(); + module.syscalls = vec![SyscallDecl { + module: "gfx".into(), + name: "draw_line".into(), + version: 1, + arg_slots: 4, + ret_slots: 0, + }]; + + let data = module.serialize(); + let loaded = BytecodeLoader::load(&data).unwrap(); + + assert_eq!(loaded.syscalls, module.syscalls); + } + + #[test] + fn test_malformed_sysc_section_is_rejected() { + let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, vec![1, 0, 0])]); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MalformedSection)); + } + + #[test] + fn test_invalid_utf8_in_sysc_section_is_rejected() { + let mut sysc = Vec::new(); + sysc.extend_from_slice(&1u32.to_le_bytes()); + sysc.extend_from_slice(&1u16.to_le_bytes()); + sysc.push(0xFF); + sysc.extend_from_slice(&1u16.to_le_bytes()); + sysc.push(b'x'); + sysc.extend_from_slice(&1u16.to_le_bytes()); + sysc.extend_from_slice(&0u16.to_le_bytes()); + sysc.extend_from_slice(&0u16.to_le_bytes()); + + let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, sysc)]); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidUtf8)); + } + + #[test] + fn test_duplicate_sysc_identity_is_rejected() { + let mut module = minimal_module(); + module.syscalls = vec![ + SyscallDecl { + module: "system".into(), + name: "has_cart".into(), + version: 1, + arg_slots: 0, + ret_slots: 1, + }, + SyscallDecl { + module: "system".into(), + name: "has_cart".into(), + version: 1, + arg_slots: 0, + ret_slots: 1, + }, + ]; + + let data = module.serialize(); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::DuplicateSyscallIdentity)); } } diff --git a/crates/console/prometeu-bytecode/src/program_image.rs b/crates/console/prometeu-bytecode/src/program_image.rs index 7dd95b3b..c3293dd8 100644 --- a/crates/console/prometeu-bytecode/src/program_image.rs +++ b/crates/console/prometeu-bytecode/src/program_image.rs @@ -118,6 +118,7 @@ impl From for BytecodeModule { code: program.rom.as_ref().to_vec(), debug_info: program.debug_info.clone(), exports, + syscalls: Vec::new(), } } } diff --git a/crates/tools/pbxgen-stress/src/lib.rs b/crates/tools/pbxgen-stress/src/lib.rs index 632a2d97..8501d834 100644 --- a/crates/tools/pbxgen-stress/src/lib.rs +++ b/crates/tools/pbxgen-stress/src/lib.rs @@ -38,6 +38,7 @@ pub fn generate() -> Result<()> { function_names: vec![(0, "main".into())], }), exports: vec![Export { symbol: "main".into(), func_idx: 0 }], + syscalls: vec![], }; let bytes = module.serialize(); diff --git a/docs/pull-requests/PR-2 - PBX SYSC and HOSTCALL Loader Patching.md b/docs/pull-requests/PR-2 - PBX SYSC and HOSTCALL Loader Patching.md index f3a17166..f84f5f1b 100644 --- a/docs/pull-requests/PR-2 - PBX SYSC and HOSTCALL Loader Patching.md +++ b/docs/pull-requests/PR-2 - PBX SYSC and HOSTCALL Loader Patching.md @@ -268,6 +268,124 @@ Required behavior: - patch before `Verifier::verify(...)` +## Implementation Plan + +Implementation should be staged so each phase leaves the workspace in a coherent state and keeps the verifier/VM contract intact. + +### Commit Checklist + +1. `prometeu-bytecode: add SyscallDecl model and mandatory SYSC section` +2. `prometeu-bytecode: add SYSC parser validation and load errors` +3. `prometeu-bytecode: add phase-1 coverage for empty/valid/invalid SYSC` +4. `prometeu-bytecode: wire downstream constructors to new BytecodeModule.syscalls field` +5. `prometeu-bytecode: clean up naming/docs after phase-1 passes` + +### Phase 1 - Extend PBX module format + +Target crates: + +- `crates/console/prometeu-bytecode` + +Steps: + +1. add `SyscallDecl` to `src/model.rs` and extend `BytecodeModule` with `syscalls: Vec` +2. reserve a new section kind for `SYSC` in module serialization/deserialization +3. enforce the mandatory-section rule: valid PBS images always carry `SYSC`, including `count = 0` +4. reject malformed payloads, invalid UTF-8, and duplicate canonical identities during load +5. update `src/lib.rs` exports if needed so the VM and HAL can consume the new declarations + +Checkpoint: + +- `BytecodeLoader::load(...)` returns `BytecodeModule` with canonical syscall declarations preserved from PBX + +### Phase 2 - Add pre-load opcode support + +Target crates: + +- `crates/console/prometeu-bytecode` + +Steps: + +1. add `HOSTCALL` to `src/opcode.rs` with a `u32` immediate +2. extend `src/opcode_spec.rs` and `src/decoder.rs` so the loader can scan and decode `HOSTCALL` +3. update `src/assembler.rs` and `src/disassembler.rs` so tests and fixtures can produce/read pre-load artifacts +4. keep the runtime contract explicit: `HOSTCALL` is representable in bytecode artifacts but must not survive loader patching + +Checkpoint: + +- bytecode tooling round-trips `HOSTCALL ` correctly, while runtime execution still depends on patched `SYSCALL ` + +### Phase 3 - Bridge PBX declarations to host metadata + +Target crates: + +- `crates/console/prometeu-hal` + +Steps: + +1. add a resolver path that accepts program-owned syscall declarations instead of only `&'static str` identities +2. resolve each `(module, name, version)` against `src/syscalls.rs` +3. validate `arg_slots` and `ret_slots` against authoritative `SyscallMeta` +4. validate required capabilities against cartridge-derived `CapFlags` +5. return deterministic, load-facing errors for unknown syscalls, ABI mismatches, and missing capabilities + +Design note: + +- this phase likely needs a small owned-string adapter or a new helper alongside `resolve_program_syscalls(...)`, because PBX strings are runtime data, not `&'static str` + +Checkpoint: + +- given only `BytecodeModule.syscalls` plus granted `CapFlags`, the loader can produce a resolved table `sysc_index -> syscall_id` + +### Phase 4 - Patch before verification + +Target crates: + +- `crates/console/prometeu-vm` + +Steps: + +1. add a load-time patching helper near `src/virtual_machine.rs` or a small dedicated module +2. run that helper immediately after `BytecodeLoader::load(...)` and before `Verifier::verify(...)` +3. scan `module.code`, decode every instruction, and rewrite `HOSTCALL ` into `SYSCALL ` +4. reject out-of-bounds `HOSTCALL` indices during the scan +5. track `SYSC` usage and reject declarations that are never referenced +6. assert that no `HOSTCALL` remains before handing code to the verifier +7. only then call `Verifier::verify(...)`, compute `max_stack_slots`, and build `ProgramImage::from(module)` + +Why the sequencing matters: + +- the current load path in `crates/console/prometeu-vm/src/virtual_machine.rs` verifies the raw module immediately after `BytecodeLoader::load(...)` +- `ProgramImage` currently stores only the final ROM/functions/constants and does not preserve a syscall declaration table, so patching must happen while the code is still a `BytecodeModule` + +Checkpoint: + +- the verifier sees only numeric `SYSCALL ` instructions, preserving the existing verifier and VM execution model + +### Phase 5 - Tests and failure matrix + +Target crates: + +- `crates/console/prometeu-bytecode` +- `crates/console/prometeu-hal` +- `crates/console/prometeu-vm` + +Steps: + +1. add serialization/deserialization tests for missing, empty, valid, malformed, and duplicate `SYSC` +2. add opcode tests for `HOSTCALL` decoding and assembler/disassembler coverage +3. add resolver tests for unknown identity, ABI mismatch, and capability mismatch +4. add VM load-path tests proving patch-before-verify behavior +5. add an assertion that final executable images contain only numeric `SYSCALL` + +Suggested PR slicing: + +1. bytecode format + tests +2. `HOSTCALL` opcode plumbing +3. HAL resolution/ABI validation +4. VM loader patching +5. integration and regression tests + ## Deterministic Load Errors Load must fail for at least: diff --git a/test-cartridges/stress-console/program.pbx b/test-cartridges/stress-console/program.pbx index aec099eb..ed965b6a 100644 Binary files a/test-cartridges/stress-console/program.pbx and b/test-cartridges/stress-console/program.pbx differ