diff --git a/crates/prometeu-bytecode/src/lib.rs b/crates/prometeu-bytecode/src/lib.rs index 1ee812d4..c7c6966e 100644 --- a/crates/prometeu-bytecode/src/lib.rs +++ b/crates/prometeu-bytecode/src/lib.rs @@ -20,3 +20,4 @@ pub mod pbc; pub mod readwrite; pub mod asm; pub mod disasm; +pub mod v0; diff --git a/crates/prometeu-bytecode/src/v0/mod.rs b/crates/prometeu-bytecode/src/v0/mod.rs new file mode 100644 index 00000000..72c93439 --- /dev/null +++ b/crates/prometeu-bytecode/src/v0/mod.rs @@ -0,0 +1,388 @@ +use crate::pbc::ConstantPoolEntry; +use crate::opcode::OpCode; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum LoadError { + InvalidMagic, + InvalidVersion, + InvalidEndianness, + OverlappingSections, + SectionOutOfBounds, + InvalidOpcode, + InvalidConstIndex, + InvalidFunctionIndex, + MalformedHeader, + MalformedSection, + UnexpectedEof, +} + +#[derive(Debug, Clone, Default, PartialEq)] +pub struct FunctionMeta { + pub code_offset: u32, + pub code_len: u32, + pub param_slots: u16, + pub local_slots: u16, + pub return_slots: u16, + pub max_stack_slots: u16, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BytecodeModule { + pub version: u16, + pub const_pool: Vec, + pub functions: Vec, + pub code: Vec, +} + +pub struct BytecodeLoader; + +impl BytecodeLoader { + pub fn load(bytes: &[u8]) -> Result { + if bytes.len() < 32 { + return Err(LoadError::UnexpectedEof); + } + + // Magic "PBS\0" + if &bytes[0..4] != b"PBS\0" { + return Err(LoadError::InvalidMagic); + } + + let version = u16::from_le_bytes([bytes[4], bytes[5]]); + if version != 0 { + return Err(LoadError::InvalidVersion); + } + + let endianness = bytes[6]; + if endianness != 0 { // 0 = Little Endian + return Err(LoadError::InvalidEndianness); + } + + let section_count = u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]); + + let mut sections = Vec::new(); + let mut pos = 32; + for _ in 0..section_count { + if pos + 12 > bytes.len() { + return Err(LoadError::UnexpectedEof); + } + let kind = u32::from_le_bytes([bytes[pos], bytes[pos+1], bytes[pos+2], bytes[pos+3]]); + let offset = u32::from_le_bytes([bytes[pos+4], bytes[pos+5], bytes[pos+6], bytes[pos+7]]); + let length = u32::from_le_bytes([bytes[pos+8], bytes[pos+9], bytes[pos+10], bytes[pos+11]]); + + // Basic bounds check + if (offset as usize) + (length as usize) > bytes.len() { + return Err(LoadError::SectionOutOfBounds); + } + + sections.push((kind, offset, length)); + pos += 12; + } + + // Check for overlapping sections + for i in 0..sections.len() { + for j in i + 1..sections.len() { + let (_, o1, l1) = sections[i]; + let (_, o2, l2) = sections[j]; + + if (o1 < o2 + l2) && (o2 < o1 + l1) { + return Err(LoadError::OverlappingSections); + } + } + } + + let mut module = BytecodeModule { + version, + const_pool: Vec::new(), + functions: Vec::new(), + code: Vec::new(), + }; + + for (kind, offset, length) in sections { + let section_data = &bytes[offset as usize..(offset + length) as usize]; + match kind { + 0 => { // Const Pool + module.const_pool = parse_const_pool(section_data)?; + } + 1 => { // Functions + module.functions = parse_functions(section_data)?; + } + 2 => { // Code + module.code = section_data.to_vec(); + } + _ => {} // Skip unknown or optional sections like Debug, Exports, Imports for now + } + } + + // Additional validations + validate_module(&module)?; + + Ok(module) + } +} + +fn parse_const_pool(data: &[u8]) -> Result, LoadError> { + if data.is_empty() { + return Ok(Vec::new()); + } + if data.len() < 4 { + return Err(LoadError::MalformedSection); + } + let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + let mut cp = Vec::with_capacity(count); + let mut pos = 4; + + for _ in 0..count { + if pos >= data.len() { + return Err(LoadError::UnexpectedEof); + } + let tag = data[pos]; + pos += 1; + match tag { + 0 => cp.push(ConstantPoolEntry::Null), + 1 => { // Int64 + if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } + let val = i64::from_le_bytes(data[pos..pos+8].try_into().unwrap()); + cp.push(ConstantPoolEntry::Int64(val)); + pos += 8; + } + 2 => { // Float64 + if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } + let val = f64::from_le_bytes(data[pos..pos+8].try_into().unwrap()); + cp.push(ConstantPoolEntry::Float64(val)); + pos += 8; + } + 3 => { // Boolean + if pos >= data.len() { return Err(LoadError::UnexpectedEof); } + cp.push(ConstantPoolEntry::Boolean(data[pos] != 0)); + pos += 1; + } + 4 => { // String + if pos + 4 > data.len() { return Err(LoadError::UnexpectedEof); } + let len = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; + pos += 4; + if pos + len > data.len() { return Err(LoadError::UnexpectedEof); } + let s = String::from_utf8_lossy(&data[pos..pos+len]).into_owned(); + cp.push(ConstantPoolEntry::String(s)); + pos += len; + } + 5 => { // Int32 + if pos + 4 > data.len() { return Err(LoadError::UnexpectedEof); } + let val = i32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); + cp.push(ConstantPoolEntry::Int32(val)); + pos += 4; + } + _ => return Err(LoadError::MalformedSection), + } + } + Ok(cp) +} + +fn parse_functions(data: &[u8]) -> Result, LoadError> { + if data.is_empty() { + return Ok(Vec::new()); + } + if data.len() < 4 { + return Err(LoadError::MalformedSection); + } + let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + let mut functions = Vec::with_capacity(count); + let mut pos = 4; + + for _ in 0..count { + if pos + 16 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let code_offset = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); + let code_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()); + let param_slots = u16::from_le_bytes(data[pos+8..pos+10].try_into().unwrap()); + let local_slots = u16::from_le_bytes(data[pos+10..pos+12].try_into().unwrap()); + let return_slots = u16::from_le_bytes(data[pos+12..pos+14].try_into().unwrap()); + let max_stack_slots = u16::from_le_bytes(data[pos+14..pos+16].try_into().unwrap()); + + functions.push(FunctionMeta { + code_offset, + code_len, + param_slots, + local_slots, + return_slots, + max_stack_slots, + }); + pos += 16; + } + Ok(functions) +} + +fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> { + for func in &module.functions { + // Opcode stream bounds + if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() { + return Err(LoadError::InvalidFunctionIndex); + } + } + + // Basic opcode scan for const pool indices + let mut pos = 0; + while pos < module.code.len() { + if pos + 2 > module.code.len() { + break; // Unexpected EOF in middle of opcode, maybe should be error + } + let op_val = u16::from_le_bytes([module.code[pos], module.code[pos+1]]); + let opcode = OpCode::try_from(op_val).map_err(|_| LoadError::InvalidOpcode)?; + pos += 2; + + match opcode { + OpCode::PushConst => { + if pos + 4 > module.code.len() { return Err(LoadError::UnexpectedEof); } + let idx = u32::from_le_bytes(module.code[pos..pos+4].try_into().unwrap()) as usize; + if idx >= module.const_pool.len() { + return Err(LoadError::InvalidConstIndex); + } + pos += 4; + } + OpCode::PushI32 | OpCode::PushBounded | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue + | OpCode::GetGlobal | OpCode::SetGlobal | OpCode::GetLocal | OpCode::SetLocal + | OpCode::PopN | OpCode::Syscall | OpCode::GateLoad | OpCode::GateStore => { + pos += 4; + } + OpCode::PushI64 | OpCode::PushF64 => { + pos += 8; + } + OpCode::PushBool => { + pos += 1; + } + OpCode::Call | OpCode::Alloc => { + pos += 8; + } + _ => {} + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_header(section_count: u32) -> Vec { + let mut h = vec![0u8; 32]; + h[0..4].copy_from_slice(b"PBS\0"); + h[4..6].copy_from_slice(&0u16.to_le_bytes()); // version + h[6] = 0; // endianness + h[8..12].copy_from_slice(§ion_count.to_le_bytes()); + h + } + + #[test] + fn test_invalid_magic() { + let mut data = create_header(0); + data[0] = b'X'; + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidMagic)); + } + + #[test] + fn test_invalid_version() { + let mut data = create_header(0); + data[4] = 1; + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidVersion)); + } + + #[test] + fn test_invalid_endianness() { + let mut data = create_header(0); + data[6] = 1; + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidEndianness)); + } + + #[test] + fn test_overlapping_sections() { + let mut data = create_header(2); + // Section 1: Kind 0, Offset 64, Length 32 + data.extend_from_slice(&0u32.to_le_bytes()); + data.extend_from_slice(&64u32.to_le_bytes()); + data.extend_from_slice(&32u32.to_le_bytes()); + // Section 2: Kind 1, Offset 80, Length 32 (Overlaps with Section 1) + data.extend_from_slice(&1u32.to_le_bytes()); + data.extend_from_slice(&80u32.to_le_bytes()); + data.extend_from_slice(&32u32.to_le_bytes()); + + // Ensure data is long enough for the offsets + data.resize(256, 0); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::OverlappingSections)); + } + + #[test] + fn test_section_out_of_bounds() { + let mut data = create_header(1); + // Section 1: Kind 0, Offset 64, Length 1000 + data.extend_from_slice(&0u32.to_le_bytes()); + data.extend_from_slice(&64u32.to_le_bytes()); + data.extend_from_slice(&1000u32.to_le_bytes()); + + data.resize(256, 0); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::SectionOutOfBounds)); + } + + #[test] + fn test_invalid_function_code_offset() { + let mut data = create_header(2); + // Section 1: Functions, Kind 1, Offset 64, Length 20 (Header 4 + 1 entry 16) + data.extend_from_slice(&1u32.to_le_bytes()); + data.extend_from_slice(&64u32.to_le_bytes()); + data.extend_from_slice(&20u32.to_le_bytes()); + + // Section 2: Code, Kind 2, Offset 128, Length 10 + data.extend_from_slice(&2u32.to_le_bytes()); + data.extend_from_slice(&128u32.to_le_bytes()); + data.extend_from_slice(&10u32.to_le_bytes()); + + data.resize(256, 0); + + // Setup functions section + let func_data_start = 64; + data[func_data_start..func_data_start+4].copy_from_slice(&1u32.to_le_bytes()); // 1 function + let entry_start = func_data_start + 4; + data[entry_start..entry_start+4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5 + data[entry_start+4..entry_start+8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10 + // 5 + 10 = 15 > 10 (code section length) + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex)); + } + + #[test] + fn test_invalid_const_index() { + let mut data = create_header(2); + // Section 1: Const Pool, Kind 0, Offset 64, Length 4 (Empty CP) + data.extend_from_slice(&0u32.to_le_bytes()); + data.extend_from_slice(&64u32.to_le_bytes()); + data.extend_from_slice(&4u32.to_le_bytes()); + + // Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0) + data.extend_from_slice(&2u32.to_le_bytes()); + data.extend_from_slice(&128u32.to_le_bytes()); + data.extend_from_slice(&6u32.to_le_bytes()); + + data.resize(256, 0); + + // Setup empty CP + data[64..68].copy_from_slice(&0u32.to_le_bytes()); + + // Setup code with PushConst 0 + data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes()); + data[130..134].copy_from_slice(&0u32.to_le_bytes()); + + assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex)); + } + + #[test] + fn test_valid_minimal_load() { + let data = create_header(0); + let module = BytecodeLoader::load(&data).unwrap(); + assert_eq!(module.version, 0); + assert!(module.const_pool.is_empty()); + assert!(module.functions.is_empty()); + assert!(module.code.is_empty()); + } +} diff --git a/crates/prometeu-core/src/prometeu_os/prometeu_os.rs b/crates/prometeu-core/src/prometeu_os/prometeu_os.rs index 936ac19f..b6c74cb3 100644 --- a/crates/prometeu-core/src/prometeu_os/prometeu_os.rs +++ b/crates/prometeu-core/src/prometeu_os/prometeu_os.rs @@ -162,14 +162,21 @@ impl PrometeuOS { /// Loads a cartridge into the PVM and resets the execution state. pub fn initialize_vm(&mut self, vm: &mut VirtualMachine, cartridge: &Cartridge) { - vm.initialize(cartridge.program.clone(), &cartridge.entrypoint); - - // Determines the numeric app_id - self.current_app_id = cartridge.app_id; - self.current_cartridge_title = cartridge.title.clone(); - self.current_cartridge_app_version = cartridge.app_version.clone(); - self.current_cartridge_app_mode = cartridge.app_mode; - self.current_entrypoint = cartridge.entrypoint.clone(); + match vm.initialize(cartridge.program.clone(), &cartridge.entrypoint) { + Ok(_) => { + // Determines the numeric app_id + self.current_app_id = cartridge.app_id; + self.current_cartridge_title = cartridge.title.clone(); + self.current_cartridge_app_version = cartridge.app_version.clone(); + self.current_cartridge_app_mode = cartridge.app_mode; + self.current_entrypoint = cartridge.entrypoint.clone(); + } + Err(e) => { + self.log(LogLevel::Error, LogSource::Vm, 0, format!("Failed to initialize VM: {:?}", e)); + // Fail fast: no program is installed, no app id is switched. + // We don't update current_app_id or other fields. + } + } } /// Executes a single VM instruction (Debug). @@ -427,7 +434,11 @@ mod tests { let mut hw = Hardware::new(); let signals = InputSignals::default(); - let rom = vec![0x02, 0x00, 0x00, 0x00, 0x00, 0x00]; + let rom = prometeu_bytecode::pbc::write_pbc(&prometeu_bytecode::pbc::PbcFile { + version: 0, + cp: vec![], + rom: vec![0x02, 0x00, 0x00, 0x00, 0x00, 0x00], + }).unwrap(); let cartridge = Cartridge { app_id: 1234, title: "test".to_string(), @@ -466,10 +477,14 @@ mod tests { // PUSH_CONST 0 (dummy) // FrameSync (0x80) // JMP 0 - let rom = vec![ - 0x80, 0x00, // FrameSync (2 bytes opcode) - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, // JMP 0 (2 bytes opcode + 4 bytes u32) - ]; + let rom = prometeu_bytecode::pbc::write_pbc(&prometeu_bytecode::pbc::PbcFile { + version: 0, + cp: vec![], + rom: vec![ + 0x80, 0x00, // FrameSync (2 bytes opcode) + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, // JMP 0 (2 bytes opcode + 4 bytes u32) + ], + }).unwrap(); let cartridge = Cartridge { app_id: 1234, title: "test".to_string(), @@ -665,11 +680,15 @@ mod tests { let signals = InputSignals::default(); // PushI32 0 (0x17), then Ret (0x51) - let rom = vec![ - 0x17, 0x00, // PushI32 - 0x00, 0x00, 0x00, 0x00, // value 0 - 0x51, 0x00 // Ret - ]; + let rom = prometeu_bytecode::pbc::write_pbc(&prometeu_bytecode::pbc::PbcFile { + version: 0, + cp: vec![], + rom: vec![ + 0x17, 0x00, // PushI32 + 0x00, 0x00, 0x00, 0x00, // value 0 + 0x51, 0x00 // Ret + ], + }).unwrap(); let cartridge = Cartridge { app_id: 1234, title: "test".to_string(), diff --git a/crates/prometeu-core/src/virtual_machine/mod.rs b/crates/prometeu-core/src/virtual_machine/mod.rs index 90525df1..144cbcea 100644 --- a/crates/prometeu-core/src/virtual_machine/mod.rs +++ b/crates/prometeu-core/src/virtual_machine/mod.rs @@ -19,6 +19,15 @@ pub enum VmFault { Panic(String), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VmInitError { + InvalidFormat, + UnsupportedFormat, + PpbcParseFailed, + PbsV0LoadFailed(prometeu_bytecode::v0::LoadError), + EntrypointNotFound, +} + pub struct HostReturn<'a> { stack: &'a mut Vec } diff --git a/crates/prometeu-core/src/virtual_machine/virtual_machine.rs b/crates/prometeu-core/src/virtual_machine/virtual_machine.rs index 91258b87..80ee1332 100644 --- a/crates/prometeu-core/src/virtual_machine/virtual_machine.rs +++ b/crates/prometeu-core/src/virtual_machine/virtual_machine.rs @@ -2,7 +2,7 @@ use crate::hardware::HardwareBridge; use crate::virtual_machine::call_frame::CallFrame; use crate::virtual_machine::scope_frame::ScopeFrame; use crate::virtual_machine::value::Value; -use crate::virtual_machine::{NativeInterface, Program}; +use crate::virtual_machine::{NativeInterface, Program, VmInitError}; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::pbc::{self, ConstantPoolEntry}; use prometeu_bytecode::abi::{TrapInfo, TRAP_OOB}; @@ -98,44 +98,72 @@ impl VirtualMachine { /// Resets the VM state and loads a new program. /// This is typically called by the Firmware when starting a new App/Cartridge. - pub fn initialize(&mut self, program_bytes: Vec, entrypoint: &str) { - // PBC (Prometeu ByteCode) is a binary format that includes a header, - // constant pool, and the raw ROM (bytecode). - if program_bytes.starts_with(b"PPBC") { - if let Ok(pbc_file) = pbc::parse_pbc(&program_bytes) { - let cp = pbc_file.cp.into_iter().map(|entry| match entry { - ConstantPoolEntry::Int32(v) => Value::Int32(v), - ConstantPoolEntry::Int64(v) => Value::Int64(v), - ConstantPoolEntry::Float64(v) => Value::Float(v), - ConstantPoolEntry::Boolean(v) => Value::Boolean(v), - ConstantPoolEntry::String(v) => Value::String(v), - ConstantPoolEntry::Null => Value::Null, - }).collect(); - self.program = Program::new(pbc_file.rom, cp); - } else { - // Fallback for raw bytes if PBC parsing fails - self.program = Program::new(program_bytes, vec![]); - } - } else { - // If it doesn't have the PPBC signature, treat it as raw bytecode. - self.program = Program::new(program_bytes, vec![]); - } - - // Resolve the entrypoint. Currently supports numeric addresses. - if let Ok(addr) = entrypoint.parse::() { - self.pc = addr; - } else { - self.pc = 0; - } - - // Full state reset to ensure a clean start for the App + pub fn initialize(&mut self, program_bytes: Vec, entrypoint: &str) -> Result<(), VmInitError> { + // Fail fast: reset state upfront. If we return early with an error, + // the VM is left in a "halted and empty" state. + self.program = Program::default(); + self.pc = 0; self.operand_stack.clear(); self.call_stack.clear(); self.scope_stack.clear(); self.globals.clear(); self.heap.clear(); self.cycles = 0; - self.halted = false; + self.halted = true; // execution is impossible until successful load + + // Only recognized formats are loadable. + let program = if program_bytes.starts_with(b"PPBC") { + // PBC (Prometeu ByteCode) legacy format + let pbc_file = pbc::parse_pbc(&program_bytes).map_err(|_| VmInitError::PpbcParseFailed)?; + let cp = pbc_file.cp.into_iter().map(|entry| match entry { + ConstantPoolEntry::Int32(v) => Value::Int32(v), + ConstantPoolEntry::Int64(v) => Value::Int64(v), + ConstantPoolEntry::Float64(v) => Value::Float(v), + ConstantPoolEntry::Boolean(v) => Value::Boolean(v), + ConstantPoolEntry::String(v) => Value::String(v), + ConstantPoolEntry::Null => Value::Null, + }).collect(); + Program::new(pbc_file.rom, cp) + } else if program_bytes.starts_with(b"PBS\0") { + // PBS v0 industrial format + match prometeu_bytecode::v0::BytecodeLoader::load(&program_bytes) { + Ok(module) => { + let cp = module.const_pool.into_iter().map(|entry| match entry { + ConstantPoolEntry::Int32(v) => Value::Int32(v), + ConstantPoolEntry::Int64(v) => Value::Int64(v), + ConstantPoolEntry::Float64(v) => Value::Float(v), + ConstantPoolEntry::Boolean(v) => Value::Boolean(v), + ConstantPoolEntry::String(v) => Value::String(v), + ConstantPoolEntry::Null => Value::Null, + }).collect(); + Program::new(module.code, cp) + } + Err(prometeu_bytecode::v0::LoadError::InvalidVersion) => return Err(VmInitError::UnsupportedFormat), + Err(e) => { + return Err(VmInitError::PbsV0LoadFailed(e)); + } + } + } else { + return Err(VmInitError::InvalidFormat); + }; + + // Resolve the entrypoint. Currently supports numeric addresses or empty (defaults to 0). + let pc = if entrypoint.is_empty() { + 0 + } else { + let addr = entrypoint.parse::().map_err(|_| VmInitError::EntrypointNotFound)?; + if addr >= program.rom.len() && (addr > 0 || !program.rom.is_empty()) { + return Err(VmInitError::EntrypointNotFound); + } + addr + }; + + // Finalize initialization by applying the new program and PC. + self.program = program; + self.pc = pc; + self.halted = false; // Successfully loaded, execution is now possible + + Ok(()) } /// Prepares the VM to execute a specific entrypoint by setting the PC and @@ -1522,4 +1550,69 @@ mod tests { _ => panic!("Expected Trap"), } } + + #[test] + fn test_loader_hardening_invalid_magic() { + let mut vm = VirtualMachine::default(); + let res = vm.initialize(vec![0, 0, 0, 0], ""); + assert_eq!(res, Err(VmInitError::InvalidFormat)); + // VM should remain empty + assert_eq!(vm.program.rom.len(), 0); + } + + #[test] + fn test_loader_hardening_unsupported_version() { + let mut vm = VirtualMachine::default(); + let mut header = vec![0u8; 32]; + header[0..4].copy_from_slice(b"PBS\0"); + header[4..6].copy_from_slice(&1u16.to_le_bytes()); // version 1 (unsupported) + + let res = vm.initialize(header, ""); + assert_eq!(res, Err(VmInitError::UnsupportedFormat)); + } + + #[test] + fn test_loader_hardening_malformed_pbs_v0() { + let mut vm = VirtualMachine::default(); + let mut header = vec![0u8; 32]; + header[0..4].copy_from_slice(b"PBS\0"); + header[8..12].copy_from_slice(&1u32.to_le_bytes()); // 1 section claimed but none provided + + let res = vm.initialize(header, ""); + match res { + Err(VmInitError::PbsV0LoadFailed(prometeu_bytecode::v0::LoadError::UnexpectedEof)) => {}, + _ => panic!("Expected PbsV0LoadFailed(UnexpectedEof), got {:?}", res), + } + } + + #[test] + fn test_loader_hardening_entrypoint_not_found() { + let mut vm = VirtualMachine::default(); + // Valid empty PBS v0 module + let mut header = vec![0u8; 32]; + header[0..4].copy_from_slice(b"PBS\0"); + + // Try to initialize with numeric entrypoint 10 (out of bounds for empty ROM) + let res = vm.initialize(header, "10"); + assert_eq!(res, Err(VmInitError::EntrypointNotFound)); + + // VM state should not be updated + assert_eq!(vm.pc, 0); + assert_eq!(vm.program.rom.len(), 0); + } + + #[test] + fn test_loader_hardening_successful_init() { + let mut vm = VirtualMachine::default(); + vm.pc = 123; // Pollution + + let mut header = vec![0u8; 32]; + header[0..4].copy_from_slice(b"PBS\0"); + + let res = vm.initialize(header, ""); + assert!(res.is_ok()); + assert_eq!(vm.pc, 0); + assert_eq!(vm.program.rom.len(), 0); + assert_eq!(vm.cycles, 0); + } } diff --git a/docs/specs/pbs/files/PRs para Junie Global.md b/docs/specs/pbs/files/PRs para Junie Global.md index e69de29b..2ac0ae0d 100644 --- a/docs/specs/pbs/files/PRs para Junie Global.md +++ b/docs/specs/pbs/files/PRs para Junie Global.md @@ -0,0 +1,23 @@ +# VM PR Plan — PBS v0 Executable (Industrial Baseline) + +> **Goal:** make *all PBS v0 functionality* executable on the VM with **deterministic semantics**, **closed stack/locals contract**, **stable ABI**, and **integration-grade tests**. +> +> **Non-goal:** new language features. If something must be reworked to achieve industrial quality, it *must* be reworked. + +--- + +## Guiding invariants (apply to every PR) + +### VM invariants + +1. **Every opcode has an explicit stack effect**: `pop_n → push_m` (in *slots*, not “values”). +2. **Frames are explicit**: params/locals/operand stack are separate or formally delimited. +3. **No implicit behavior**: if it isn’t encoded in bytecode or runtime state, it doesn’t exist. +4. **Deterministic traps** only (no UB): trap includes `trap_code`, `pc`, `opcode`, and (if present) `span`. +5. **Bytecode stability**: versioned format; opcodes are immutable once marked v0. + +### Compiler/VM boundary invariants + +1. **Types map to slot counts** deterministically (including flattened SAFE structs and multi-slot returns). +2. **Calling convention is frozen**: param order, return slots, caller/callee responsibilities. +3. **Imports are compile/link-time only**; VM runs a fully-linked program image. \ No newline at end of file diff --git a/docs/specs/pbs/files/PRs para Junie.md b/docs/specs/pbs/files/PRs para Junie.md index e69de29b..9a9661a4 100644 --- a/docs/specs/pbs/files/PRs para Junie.md +++ b/docs/specs/pbs/files/PRs para Junie.md @@ -0,0 +1,374 @@ +## PR-03 — Frame model v0: locals, operand stack, and function metadata + +**Why:** `let x: int = 1` failing usually means locals/frames are not modeled correctly. + +### Scope + +* Define `FunctionMeta`: + + * `code_offset`, `code_len` + * `param_slots`, `local_slots`, `return_slots` + * `max_stack_slots` (computed by verifier or compiler) +* Define `Frame`: + + * `base` (stack base index) + * `locals_base` (or equivalent) + * `return_slots` + * `pc_return` +* Decide representation: + + * Option A (recommended v0): **single VM stack** with fixed layout per frame: + + * `[args][locals][operand_stack...]` + * Use `base + local_index` addressing. + +### Deliverables + +* `CallStack` with `Vec` +* `enter_frame(meta)` allocates locals area (zero-init) +* `leave_frame()` reclaims to previous base + +### Tests + +* locals are isolated per call +* locals are zero-initialized +* stack is restored exactly after return + +### Acceptance + +* Locals are deterministic and independent from operand stack usage. + +--- + +## PR-04 — Locals opcodes: GET_LOCAL / SET_LOCAL / INIT_LOCAL + +**Why:** PBS `let` and parameters need first-class support. + +### Scope + +* Implement opcodes: + + * `GET_LOCAL ` pushes value slots + * `SET_LOCAL ` pops value slots and writes + * `INIT_LOCAL ` (optional) for explicit initialization semantics +* Enforce bounds: local slot index must be within `[0..param+local_slots)` +* Enforce slot width: if types are multi-slot, compiler emits multiple GET/SET or uses `*_N` variants. + +### Deliverables + +* `LocalAddressing` utilities +* Deterministic trap codes: + + * `TRAP_INVALID_LOCAL` + * `TRAP_LOCAL_WIDTH_MISMATCH` (if enforced) + +### Tests + +* `let x: int = 1; return x;` works +* invalid local index traps + +### Acceptance + +* `let` works reliably; no stack side effects beyond specified pops/pushes. + +--- + +## PR-05 — Core arithmetic + comparisons in VM (int/bounded/bool) + +**Why:** The minimal executable PBS needs arithmetic that doesn’t corrupt stack. + +### Scope + +* Implement v0 numeric opcodes (slot-safe): + + * `IADD, ISUB, IMUL, IDIV, IMOD` + * `ICMP_EQ, ICMP_NE, ICMP_LT, ICMP_LE, ICMP_GT, ICMP_GE` + * `BADD, BSUB, ...` (or unify with tagged values) +* Define conversion opcodes if lowering expects them: + + * `BOUND_TO_INT`, `INT_TO_BOUND_CHECKED` (trap OOB) + +### Deliverables + +* Deterministic traps: + + * `TRAP_DIV_ZERO` + * `TRAP_OOB` (bounded checks) + +### Tests + +* simple arithmetic chain +* div by zero traps +* bounded conversions trap on overflow + +### Acceptance + +* Arithmetic and comparisons are closed and verified. + +--- + +## PR-06 — Control flow opcodes: jumps, conditional branches, structured “if” + +**Why:** `if` must be predictable and verifier-safe. + +### Scope + +* Implement opcodes: + + * `JMP ` + * `JMP_IF_TRUE ` + * `JMP_IF_FALSE ` +* Verifier rules: + + * targets must be valid instruction boundaries + * stack height at join points must match + +### Tests + +* nested if +* if with empty branches +* branch join mismatch rejected + +### Acceptance + +* Control flow is safe; no implicit stack juggling. + +--- + +## PR-07 — Calling convention v0: CALL / RET / multi-slot returns + +**Why:** Without a correct call model, PBS isn’t executable. + +### Scope + +* Introduce `CALL ` + + * caller pushes args (slots) + * callee frame allocates locals +* Introduce `RET` + + * callee must leave exactly `return_slots` on operand stack at `RET` + * VM pops frame and transfers return slots to caller +* Define return mechanics for `void` (`return_slots=0`) + +### Deliverables + +* `FunctionTable` indexing and bounds checks +* Deterministic traps: + + * `TRAP_INVALID_FUNC` + * `TRAP_BAD_RET_SLOTS` + +### Tests + +* `fn add(a:int,b:int):int { return a+b; }` +* multi-slot return (e.g., `Pad` flattened) +* void call + +### Acceptance + +* Calls are stable and stack-clean. + +--- + +## PR-08 — Host syscalls v0: stable ABI, multi-slot args/returns + +**Why:** PBS relies on deterministic syscalls; ABI must be frozen and enforced. + +### Scope + +* Unify syscall invocation opcode: + + * `SYSCALL ` +* Runtime validates: + + * pops `arg_slots` + * pushes `ret_slots` +* Implement/confirm: + + * `GfxClear565 (0x1010)` + * `InputPadSnapshot (0x2010)` + * `InputTouchSnapshot (0x2011)` + +### Deliverables + +* A `SyscallRegistry` mapping id -> handler + signature +* Deterministic traps: + + * `TRAP_INVALID_SYSCALL` + * `TRAP_SYSCALL_SIG_MISMATCH` + +### Tests + +* syscall isolated tests +* wrong signature traps + +### Acceptance + +* Syscalls are “industrial”: typed by signature, deterministic, no host surprises. + +--- + +## PR-09 — Debug info v0: spans, symbols, and traceable traps + +**Why:** Industrial debugging requires actionable failures. + +### Scope + +* Add optional debug section: + + * per-instruction span table (`pc -> (file_id, start, end)`) + * function names +* Enhance trap payload with debug span (if present) + +### Tests + +* trap includes span when debug present +* trap still works without debug + +### Acceptance + +* You can pinpoint “where” a trap happened reliably. + +--- + +## PR-10 — Program image + linker: imports/exports resolved before VM run + +**Why:** Imports are compile-time, but we need an industrial linking model for multi-module PBS. + +### Scope + +* Define in bytecode: + + * `exports`: symbol -> func_id/service entry (as needed) + * `imports`: symbol refs -> relocation slots +* Implement a **linker** that: + + * builds a `ProgramImage` from N modules + * resolves imports to exports + * produces a single final `FunctionTable` and code blob + +### Notes + +* VM **does not** do name lookup at runtime. +* Linking errors are deterministic: `LINK_UNRESOLVED_SYMBOL`, `LINK_DUP_EXPORT`, etc. + +### Tests + +* two-module link success +* unresolved import fails +* duplicate export fails + +### Acceptance + +* Multi-module PBS works; “import” is operationalized correctly. + +--- + +## PR-11 — Canonical integration cartridge + golden bytecode snapshots + +**Why:** One cartridge must be the unbreakable reference. + +### Scope + +* Create `CartridgeCanonical.pbs` that covers: + + * locals + * arithmetic + * if + * function call + * syscall clear + * input snapshot +* Add `golden` artifacts: + + * canonical AST JSON (frontend) + * IR Core (optional) + * IR VM / bytecode dump + * expected VM trace (optional) + +### Tests + +* CI runs cartridge and checks: + + * no traps + * deterministic output state + +### Acceptance + +* This cartridge is the “VM heartbeat test”. + +--- + +## PR-12 — VM test harness: stepper, trace, and property tests + +**Why:** Industrial quality means test tooling, not just “it runs”. + +### Scope + +* Add `VmRunner` test harness: + + * step limit + * deterministic trace of stack deltas + * snapshot of locals +* Add property tests (lightweight): + + * stack never underflows in verified programs + * verified programs never jump out of bounds + +### Acceptance + +* Debugging is fast, and regressions are caught. + +--- + +## PR-13 — Optional: Refactor Value representation (tagged slots) for clarity + +**Why:** If current `Value` representation is the source of complexity/bugs, refactor now. + +### Scope (only if needed) + +* Make `Slot` explicit: + + * `Slot::I32`, `Slot::I64`, `Slot::U32`, `Slot::Bool`, `Slot::ConstId`, `Slot::GateId`, `Slot::Unit` +* Multi-slot types become sequences of slots. + +### Acceptance + +* Simpler, more verifiable runtime. + +--- + +# Work split (what can be parallel later) + +* VM core correctness: PR-01..PR-08 (sequential, contract-first) +* Debug + tooling: PR-09, PR-12 (parallel after PR-03) +* Linking/imports: PR-10 (parallel after PR-01) +* Canonical cartridge: PR-11 (parallel after PR-05) + +--- + +# “Stop the line” rules + +1. If a PR introduces an opcode without stack spec + verifier integration, it’s rejected. +2. If a PR changes bytecode layout without bumping version, it’s rejected. +3. If a PR adds a feature before the canonical cartridge passes, it’s rejected. + +--- + +# First implementation target (tomorrow morning, start here) + +**Start with PR-02 (Opcode spec + verifier)** even if you think you already know the bug. +Once the verifier exists, the rest becomes mechanical: every failure becomes *actionable*. + +## Definition of Done (DoD) for PBS v0 “minimum executable” + +A single canonical cartridge runs end-to-end: + +* `let` declarations (locals) +* arithmetic (+, -, *, /, %, comparisons) +* `if/else` control flow +* `when` expression (if present in lowering) +* function calls with params + returns (including `void`) +* multiple return slots (flattened structs / hardware value types) +* host syscalls (e.g., `GfxClear565`, `InputPadSnapshot`, `InputTouchSnapshot`) +* deterministic traps (OOB bounded, invalid local, invalid call target, stack underflow) \ No newline at end of file