diff --git a/crates/prometeu-bytecode/src/abi.rs b/crates/prometeu-bytecode/src/abi.rs index 6feb351a..88445f5c 100644 --- a/crates/prometeu-bytecode/src/abi.rs +++ b/crates/prometeu-bytecode/src/abi.rs @@ -50,6 +50,14 @@ pub const TRAP_INVALID_FUNC: u32 = 0x0000_000B; /// Executed RET with an incorrect stack height (mismatch with function metadata). pub const TRAP_BAD_RET_SLOTS: u32 = 0x0000_000C; +/// Detailed information about a source code span. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub struct SourceSpan { + pub file_id: u32, + pub start: u32, + pub end: u32, +} + /// Detailed information about a runtime trap. #[derive(Debug, Clone, PartialEq, Eq)] pub struct TrapInfo { @@ -61,6 +69,8 @@ pub struct TrapInfo { pub message: String, /// The absolute Program Counter (PC) address where the trap occurred. pub pc: u32, + /// Optional source span information if debug symbols are available. + pub span: Option, } /// Checks if an instruction is a jump (branch) instruction. diff --git a/crates/prometeu-bytecode/src/v0/mod.rs b/crates/prometeu-bytecode/src/v0/mod.rs index 9f8a1486..21b4b06a 100644 --- a/crates/prometeu-bytecode/src/v0/mod.rs +++ b/crates/prometeu-bytecode/src/v0/mod.rs @@ -1,5 +1,6 @@ use crate::pbc::ConstantPoolEntry; use crate::opcode::OpCode; +use crate::abi::SourceSpan; #[derive(Debug, Clone, PartialEq, Eq)] pub enum LoadError { @@ -26,12 +27,33 @@ pub struct FunctionMeta { pub max_stack_slots: u16, } +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct DebugInfo { + pub pc_to_span: Vec<(u32, SourceSpan)>, // Sorted by PC + pub function_names: Vec<(u32, String)>, // (func_idx, name) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Export { + pub symbol: String, + pub func_idx: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Import { + pub symbol: String, + pub relocation_pcs: Vec, +} + #[derive(Debug, Clone, PartialEq)] pub struct BytecodeModule { pub version: u16, pub const_pool: Vec, pub functions: Vec, pub code: Vec, + pub debug_info: Option, + pub exports: Vec, + pub imports: Vec, } pub struct BytecodeLoader; @@ -95,6 +117,9 @@ impl BytecodeLoader { const_pool: Vec::new(), functions: Vec::new(), code: Vec::new(), + debug_info: None, + exports: Vec::new(), + imports: Vec::new(), }; for (kind, offset, length) in sections { @@ -109,7 +134,16 @@ impl BytecodeLoader { 2 => { // Code module.code = section_data.to_vec(); } - _ => {} // Skip unknown or optional sections like Debug, Exports, Imports for now + 3 => { // Debug Info + module.debug_info = Some(parse_debug_section(section_data)?); + } + 4 => { // Exports + module.exports = parse_exports(section_data)?; + } + 5 => { // Imports + module.imports = parse_imports(section_data)?; + } + _ => {} // Skip unknown or optional sections } } @@ -212,6 +246,125 @@ fn parse_functions(data: &[u8]) -> Result, LoadError> { Ok(functions) } +fn parse_debug_section(data: &[u8]) -> Result { + if data.is_empty() { + return Ok(DebugInfo::default()); + } + if data.len() < 8 { + return Err(LoadError::MalformedSection); + } + + let mut pos = 0; + + // PC to Span table + let span_count = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; + pos += 4; + let mut pc_to_span = Vec::with_capacity(span_count); + for _ in 0..span_count { + if pos + 16 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let pc = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); + let file_id = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()); + let start = u32::from_le_bytes(data[pos+8..pos+12].try_into().unwrap()); + let end = u32::from_le_bytes(data[pos+12..pos+16].try_into().unwrap()); + pc_to_span.push((pc, SourceSpan { file_id, start, end })); + pos += 16; + } + + // Function names table + if pos + 4 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let func_name_count = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; + pos += 4; + let mut function_names = Vec::with_capacity(func_name_count); + for _ in 0..func_name_count { + if pos + 8 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let func_idx = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); + let name_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()) as usize; + pos += 8; + if pos + name_len > data.len() { + return Err(LoadError::UnexpectedEof); + } + let name = String::from_utf8_lossy(&data[pos..pos+name_len]).into_owned(); + function_names.push((func_idx, name)); + pos += name_len; + } + + Ok(DebugInfo { pc_to_span, function_names }) +} + +fn parse_exports(data: &[u8]) -> Result, LoadError> { + if data.is_empty() { + return Ok(Vec::new()); + } + if data.len() < 4 { + return Err(LoadError::MalformedSection); + } + let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + let mut exports = Vec::with_capacity(count); + let mut pos = 4; + + for _ in 0..count { + if pos + 8 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let func_idx = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); + let name_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()) as usize; + pos += 8; + if pos + name_len > data.len() { + return Err(LoadError::UnexpectedEof); + } + let symbol = String::from_utf8_lossy(&data[pos..pos+name_len]).into_owned(); + exports.push(Export { symbol, func_idx }); + pos += name_len; + } + Ok(exports) +} + +fn parse_imports(data: &[u8]) -> Result, LoadError> { + if data.is_empty() { + return Ok(Vec::new()); + } + if data.len() < 4 { + return Err(LoadError::MalformedSection); + } + let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + let mut imports = Vec::with_capacity(count); + let mut pos = 4; + + for _ in 0..count { + if pos + 8 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let relocation_count = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; + let name_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()) as usize; + pos += 8; + + if pos + name_len > data.len() { + return Err(LoadError::UnexpectedEof); + } + let symbol = String::from_utf8_lossy(&data[pos..pos+name_len]).into_owned(); + pos += name_len; + + if pos + relocation_count * 4 > data.len() { + return Err(LoadError::UnexpectedEof); + } + let mut relocation_pcs = Vec::with_capacity(relocation_count); + for _ in 0..relocation_count { + let pc = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); + relocation_pcs.push(pc); + pos += 4; + } + + imports.push(Import { symbol, relocation_pcs }); + } + Ok(imports) +} + fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> { for func in &module.functions { // Opcode stream bounds diff --git a/crates/prometeu-core/src/virtual_machine/linker.rs b/crates/prometeu-core/src/virtual_machine/linker.rs new file mode 100644 index 00000000..9c6e3c35 --- /dev/null +++ b/crates/prometeu-core/src/virtual_machine/linker.rs @@ -0,0 +1,294 @@ +use crate::virtual_machine::{ProgramImage, Value}; +use prometeu_bytecode::v0::{BytecodeModule, DebugInfo}; +use prometeu_bytecode::pbc::ConstantPoolEntry; +use prometeu_bytecode::opcode::OpCode; +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum LinkError { + UnresolvedSymbol(String), + DuplicateExport(String), +} + +pub struct Linker; + +impl Linker { + pub fn link(modules: &[BytecodeModule]) -> Result { + let mut combined_code = Vec::new(); + let mut combined_functions = Vec::new(); + let mut combined_constants = Vec::new(); + let mut combined_debug_pc_to_span = Vec::new(); + let mut combined_debug_function_names = Vec::new(); + + let mut exports = HashMap::new(); + + // Offset mapping for each module + let mut module_code_offsets = Vec::with_capacity(modules.len()); + let mut module_function_offsets = Vec::with_capacity(modules.len()); + + // First pass: collect exports and calculate offsets + for module in modules { + let code_offset = combined_code.len() as u32; + let function_offset = combined_functions.len() as u32; + + module_code_offsets.push(code_offset); + module_function_offsets.push(function_offset); + + for export in &module.exports { + if exports.contains_key(&export.symbol) { + return Err(LinkError::DuplicateExport(export.symbol.clone())); + } + exports.insert(export.symbol.clone(), (function_offset + export.func_idx) as u32); + } + + combined_code.extend_from_slice(&module.code); + + for func in &module.functions { + let mut linked_func = func.clone(); + linked_func.code_offset += code_offset; + combined_functions.push(linked_func); + } + } + + // Second pass: resolve imports and relocate constants/code + for (i, module) in modules.iter().enumerate() { + let code_offset = module_code_offsets[i] as usize; + let const_base = combined_constants.len() as u32; + + // Relocate constant pool entries for this module + for entry in &module.const_pool { + combined_constants.push(match entry { + ConstantPoolEntry::Int32(v) => Value::Int32(*v), + ConstantPoolEntry::Int64(v) => Value::Int64(*v), + ConstantPoolEntry::Float64(v) => Value::Float(*v), + ConstantPoolEntry::Boolean(v) => Value::Boolean(*v), + ConstantPoolEntry::String(v) => Value::String(v.clone()), + ConstantPoolEntry::Null => Value::Null, + }); + } + + // Patch relocations for imports + for import in &module.imports { + let target_func_idx = exports.get(&import.symbol) + .ok_or_else(|| LinkError::UnresolvedSymbol(import.symbol.clone()))?; + + for &reloc_pc in &import.relocation_pcs { + let absolute_pc = code_offset + reloc_pc as usize; + // CALL opcode is 2 bytes, immediate is next 4 bytes + let imm_offset = absolute_pc + 2; + if imm_offset + 4 <= combined_code.len() { + let bytes = target_func_idx.to_le_bytes(); + combined_code[imm_offset..imm_offset+4].copy_from_slice(&bytes); + } + } + } + + // Relocate PUSH_CONST instructions + if const_base > 0 { + let mut pos = code_offset; + let end = code_offset + module.code.len(); + while pos < end { + if pos + 2 > end { break; } + let op_val = u16::from_le_bytes([combined_code[pos], combined_code[pos+1]]); + let opcode = match OpCode::try_from(op_val) { + Ok(op) => op, + Err(_) => { + pos += 2; + continue; + } + }; + pos += 2; + + match opcode { + OpCode::PushConst => { + if pos + 4 <= end { + let old_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()); + let new_idx = old_idx + const_base; + combined_code[pos..pos+4].copy_from_slice(&new_idx.to_le_bytes()); + pos += 4; + } + } + OpCode::PushI32 | OpCode::PushBounded | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue + | OpCode::GetGlobal | OpCode::SetGlobal | OpCode::GetLocal | OpCode::SetLocal + | OpCode::PopN | OpCode::Syscall | OpCode::GateLoad | OpCode::GateStore | OpCode::Call => { + pos += 4; + } + OpCode::PushI64 | OpCode::PushF64 | OpCode::Alloc => { + pos += 8; + } + OpCode::PushBool => { + pos += 1; + } + _ => {} + } + } + } + + // Handle debug info + if let Some(debug_info) = &module.debug_info { + for (pc, span) in &debug_info.pc_to_span { + combined_debug_pc_to_span.push((pc + module_code_offsets[i], span.clone())); + } + for (func_idx, name) in &debug_info.function_names { + combined_debug_function_names.push((func_idx + module_function_offsets[i], name.clone())); + } + } + } + + let debug_info = if !combined_debug_pc_to_span.is_empty() { + Some(DebugInfo { + pc_to_span: combined_debug_pc_to_span, + function_names: combined_debug_function_names, + }) + } else { + None + }; + + Ok(ProgramImage::new( + combined_code, + combined_constants, + combined_functions, + debug_info, + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use prometeu_bytecode::v0::{BytecodeModule, FunctionMeta, Export, Import}; + use prometeu_bytecode::opcode::OpCode; + + #[test] + fn test_linker_basic() { + // Module 1: defines 'foo', calls 'bar' + let mut code1 = Vec::new(); + // Function 'foo' at offset 0 + code1.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); + code1.extend_from_slice(&0u32.to_le_bytes()); // placeholder for 'bar' + code1.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); + + let m1 = BytecodeModule { + version: 0, + const_pool: vec![], + functions: vec![FunctionMeta { + code_offset: 0, + code_len: code1.len() as u32, + ..Default::default() + }], + code: code1, + debug_info: None, + exports: vec![Export { symbol: "foo".to_string(), func_idx: 0 }], + imports: vec![Import { symbol: "bar".to_string(), relocation_pcs: vec![0] }], + }; + + // Module 2: defines 'bar' + let mut code2 = Vec::new(); + code2.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); + + let m2 = BytecodeModule { + version: 0, + const_pool: vec![], + functions: vec![FunctionMeta { + code_offset: 0, + code_len: code2.len() as u32, + ..Default::default() + }], + code: code2, + debug_info: None, + exports: vec![Export { symbol: "bar".to_string(), func_idx: 0 }], + imports: vec![], + }; + + let result = Linker::link(&[m1, m2]).unwrap(); + + assert_eq!(result.functions.len(), 2); + // 'foo' is func 0, 'bar' is func 1 + assert_eq!(result.functions[0].code_offset, 0); + assert_eq!(result.functions[1].code_offset, 8); + + // Let's check patched code + let patched_func_id = u32::from_le_bytes(result.rom[2..6].try_into().unwrap()); + assert_eq!(patched_func_id, 1); // Points to 'bar' + } + + #[test] + fn test_linker_unresolved() { + let m1 = BytecodeModule { + version: 0, + const_pool: vec![], + functions: vec![], + code: vec![], + debug_info: None, + exports: vec![], + imports: vec![Import { symbol: "missing".to_string(), relocation_pcs: vec![] }], + }; + let result = Linker::link(&[m1]); + assert_eq!(result.unwrap_err(), LinkError::UnresolvedSymbol("missing".to_string())); + } + + #[test] + fn test_linker_duplicate_export() { + let m1 = BytecodeModule { + version: 0, + const_pool: vec![], + functions: vec![], + code: vec![], + debug_info: None, + exports: vec![Export { symbol: "dup".to_string(), func_idx: 0 }], + imports: vec![], + }; + let m2 = m1.clone(); + let result = Linker::link(&[m1, m2]); + assert_eq!(result.unwrap_err(), LinkError::DuplicateExport("dup".to_string())); + } + + #[test] + fn test_linker_const_relocation() { + // Module 1: uses constants + let mut code1 = Vec::new(); + code1.extend_from_slice(&(OpCode::PushConst as u16).to_le_bytes()); + code1.extend_from_slice(&0u32.to_le_bytes()); // Index 0 + code1.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); + + let m1 = BytecodeModule { + version: 0, + const_pool: vec![ConstantPoolEntry::Int32(42)], + functions: vec![FunctionMeta { code_offset: 0, code_len: code1.len() as u32, ..Default::default() }], + code: code1, + debug_info: None, + exports: vec![], + imports: vec![], + }; + + // Module 2: also uses constants + let mut code2 = Vec::new(); + code2.extend_from_slice(&(OpCode::PushConst as u16).to_le_bytes()); + code2.extend_from_slice(&0u32.to_le_bytes()); // Index 0 (local to module 2) + code2.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); + + let m2 = BytecodeModule { + version: 0, + const_pool: vec![ConstantPoolEntry::Int32(99)], + functions: vec![FunctionMeta { code_offset: 0, code_len: code2.len() as u32, ..Default::default() }], + code: code2, + debug_info: None, + exports: vec![], + imports: vec![], + }; + + let result = Linker::link(&[m1, m2]).unwrap(); + + assert_eq!(result.constant_pool.len(), 2); + assert_eq!(result.constant_pool[0], Value::Int32(42)); + assert_eq!(result.constant_pool[1], Value::Int32(99)); + + // Code for module 1 (starts at 0) + let idx1 = u32::from_le_bytes(result.rom[2..6].try_into().unwrap()); + assert_eq!(idx1, 0); + + // Code for module 2 (starts at 8) + let idx2 = u32::from_le_bytes(result.rom[10..14].try_into().unwrap()); + assert_eq!(idx2, 1); + } +} diff --git a/crates/prometeu-core/src/virtual_machine/local_addressing.rs b/crates/prometeu-core/src/virtual_machine/local_addressing.rs index d41545df..0a872c05 100644 --- a/crates/prometeu-core/src/virtual_machine/local_addressing.rs +++ b/crates/prometeu-core/src/virtual_machine/local_addressing.rs @@ -24,6 +24,7 @@ pub fn check_local_slot(meta: &FunctionMeta, slot: u32, opcode: u16, pc: u32) -> opcode, message: format!("Local slot {} out of bounds for function (limit {})", slot, limit), pc, + span: None, }) } } diff --git a/crates/prometeu-core/src/virtual_machine/mod.rs b/crates/prometeu-core/src/virtual_machine/mod.rs index 7a7e47e7..b849f2e9 100644 --- a/crates/prometeu-core/src/virtual_machine/mod.rs +++ b/crates/prometeu-core/src/virtual_machine/mod.rs @@ -7,14 +7,16 @@ pub mod local_addressing; pub mod opcode_spec; pub mod bytecode; pub mod verifier; +pub mod linker; use crate::hardware::HardwareBridge; -pub use program::Program; +pub use program::ProgramImage; pub use prometeu_bytecode::opcode::OpCode; pub use value::Value; pub use virtual_machine::{BudgetReport, LogicalFrameEndingReason, VirtualMachine}; pub use prometeu_bytecode::abi::TrapInfo; pub use verifier::VerifierError; +pub use linker::{Linker, LinkError}; pub type SyscallId = u32; @@ -30,6 +32,7 @@ pub enum VmInitError { UnsupportedFormat, PpbcParseFailed, PbsV0LoadFailed(prometeu_bytecode::v0::LoadError), + LinkFailed(LinkError), EntrypointNotFound, VerificationFailed(VerifierError), UnsupportedLegacyCallEncoding, diff --git a/crates/prometeu-core/src/virtual_machine/program.rs b/crates/prometeu-core/src/virtual_machine/program.rs index c0982d74..e55aeed5 100644 --- a/crates/prometeu-core/src/virtual_machine/program.rs +++ b/crates/prometeu-core/src/virtual_machine/program.rs @@ -1,16 +1,18 @@ use crate::virtual_machine::Value; -use prometeu_bytecode::v0::FunctionMeta; +use prometeu_bytecode::v0::{FunctionMeta, DebugInfo}; +use prometeu_bytecode::abi::TrapInfo; use std::sync::Arc; #[derive(Debug, Clone, Default)] -pub struct Program { +pub struct ProgramImage { pub rom: Arc<[u8]>, pub constant_pool: Arc<[Value]>, pub functions: Arc<[FunctionMeta]>, + pub debug_info: Option, } -impl Program { - pub fn new(rom: Vec, constant_pool: Vec, mut functions: Vec) -> Self { +impl ProgramImage { + pub fn new(rom: Vec, constant_pool: Vec, mut functions: Vec, debug_info: Option) -> Self { if functions.is_empty() && !rom.is_empty() { functions.push(FunctionMeta { code_offset: 0, @@ -22,6 +24,39 @@ impl Program { rom: Arc::from(rom), constant_pool: Arc::from(constant_pool), functions: Arc::from(functions), + debug_info, } } + + pub fn create_trap(&self, code: u32, opcode: u16, mut message: String, pc: u32) -> TrapInfo { + let span = self.debug_info.as_ref().and_then(|di| { + di.pc_to_span.iter().find(|(p, _)| *p == pc).map(|(_, s)| s.clone()) + }); + + if let Some(func_idx) = self.find_function_index(pc) { + if let Some(func_name) = self.get_function_name(func_idx) { + message = format!("{} (in function {})", message, func_name); + } + } + + TrapInfo { + code, + opcode, + message, + pc, + span, + } + } + + pub fn find_function_index(&self, pc: u32) -> Option { + self.functions.iter().position(|f| { + pc >= f.code_offset && pc < (f.code_offset + f.code_len) + }) + } + + pub fn get_function_name(&self, func_idx: usize) -> Option<&str> { + self.debug_info.as_ref() + .and_then(|di| di.function_names.iter().find(|(idx, _)| *idx as usize == func_idx)) + .map(|(_, name)| name.as_str()) + } } diff --git a/crates/prometeu-core/src/virtual_machine/virtual_machine.rs b/crates/prometeu-core/src/virtual_machine/virtual_machine.rs index 050a3373..72784a7f 100644 --- a/crates/prometeu-core/src/virtual_machine/virtual_machine.rs +++ b/crates/prometeu-core/src/virtual_machine/virtual_machine.rs @@ -2,7 +2,7 @@ use crate::hardware::HardwareBridge; use crate::virtual_machine::call_frame::CallFrame; use crate::virtual_machine::scope_frame::ScopeFrame; use crate::virtual_machine::value::Value; -use crate::virtual_machine::{NativeInterface, Program, VmInitError}; +use crate::virtual_machine::{NativeInterface, ProgramImage, VmInitError}; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::pbc::{self, ConstantPoolEntry}; use prometeu_bytecode::abi::{TrapInfo, TRAP_OOB, TRAP_DIV_ZERO, TRAP_TYPE, TRAP_INVALID_FUNC, TRAP_BAD_RET_SLOTS}; @@ -28,6 +28,11 @@ pub enum LogicalFrameEndingReason { Panic(String), } +pub enum OpError { + Trap(u32, String), + Panic(String), +} + impl From for LogicalFrameEndingReason { fn from(info: TrapInfo) -> Self { LogicalFrameEndingReason::Trap(info) @@ -74,7 +79,7 @@ pub struct VirtualMachine { /// Global Variable Store: Variables that persist for the lifetime of the program. pub globals: Vec, /// The loaded executable (Bytecode + Constant Pool), that is the ROM translated. - pub program: Program, + pub program: ProgramImage, /// Heap Memory: Dynamic allocation pool. pub heap: Vec, /// Total virtual cycles consumed since the VM started. @@ -94,7 +99,7 @@ impl VirtualMachine { call_stack: Vec::new(), scope_stack: Vec::new(), globals: Vec::new(), - program: Program::new(rom, constant_pool, vec![]), + program: ProgramImage::new(rom, constant_pool, vec![], None), heap: Vec::new(), cycles: 0, halted: false, @@ -107,7 +112,7 @@ impl VirtualMachine { pub fn initialize(&mut self, program_bytes: Vec, entrypoint: &str) -> Result<(), VmInitError> { // Fail fast: reset state upfront. If we return early with an error, // the VM is left in a "halted and empty" state. - self.program = Program::default(); + self.program = ProgramImage::default(); self.pc = 0; self.operand_stack.clear(); self.call_stack.clear(); @@ -133,29 +138,35 @@ impl VirtualMachine { ConstantPoolEntry::String(v) => Value::String(v), ConstantPoolEntry::Null => Value::Null, }).collect(); - Program::new(pbc_file.rom, cp, vec![]) + ProgramImage::new(pbc_file.rom, cp, vec![], None) } else if program_bytes.starts_with(b"PBS\0") { // PBS v0 industrial format match prometeu_bytecode::v0::BytecodeLoader::load(&program_bytes) { - Ok(mut module) => { - // Run verifier - let max_stacks = crate::virtual_machine::verifier::Verifier::verify(&module.code, &module.functions) + Ok(module) => { + // Link module(s) + let mut linked_program = crate::virtual_machine::Linker::link(&[module]) + .map_err(VmInitError::LinkFailed)?; + + // Run verifier on the linked program + // Note: Verifier currently expects code and functions separately. + // We need to ensure it works with the linked program. + let max_stacks = crate::virtual_machine::verifier::Verifier::verify(&linked_program.rom, &linked_program.functions) .map_err(VmInitError::VerificationFailed)?; // Apply verified max_stack_slots - for (func, max_stack) in module.functions.iter_mut().zip(max_stacks) { + // Since linked_program.functions is an Arc<[FunctionMeta]>, we need to get a mutable copy if we want to update it. + // Or we update it before creating the ProgramImage. + + // Actually, let's look at how we can update max_stack_slots. + // ProgramImage holds Arc<[FunctionMeta]>. + + let mut functions = linked_program.functions.as_ref().to_vec(); + for (func, max_stack) in functions.iter_mut().zip(max_stacks) { func.max_stack_slots = max_stack; } + linked_program.functions = std::sync::Arc::from(functions); - let cp = module.const_pool.into_iter().map(|entry| match entry { - ConstantPoolEntry::Int32(v) => Value::Int32(v), - ConstantPoolEntry::Int64(v) => Value::Int64(v), - ConstantPoolEntry::Float64(v) => Value::Float(v), - ConstantPoolEntry::Boolean(v) => Value::Boolean(v), - ConstantPoolEntry::String(v) => Value::String(v), - ConstantPoolEntry::Null => Value::Null, - }).collect(); - Program::new(module.code, cp, module.functions) + linked_program } Err(prometeu_bytecode::v0::LoadError::InvalidVersion) => return Err(VmInitError::UnsupportedFormat), Err(e) => { @@ -386,12 +397,7 @@ impl VirtualMachine { } Value::Boolean(true) => {} _ => { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_TYPE, - opcode: opcode as u16, - message: format!("Expected boolean for JMP_IF_FALSE, got {:?}", val), - pc: start_pc as u32, - })); + return Err(self.trap(TRAP_TYPE, opcode as u16, format!("Expected boolean for JMP_IF_FALSE, got {:?}", val), start_pc as u32)); } } } @@ -405,12 +411,7 @@ impl VirtualMachine { } Value::Boolean(false) => {} _ => { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_TYPE, - opcode: opcode as u16, - message: format!("Expected boolean for JMP_IF_TRUE, got {:?}", val), - pc: start_pc as u32, - })); + return Err(self.trap(TRAP_TYPE, opcode as u16, format!("Expected boolean for JMP_IF_TRUE, got {:?}", val), start_pc as u32)); } } } @@ -433,12 +434,7 @@ impl VirtualMachine { OpCode::PushBounded => { let val = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); if val > 0xFFFF { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_OOB, - opcode: opcode as u16, - message: format!("Bounded value overflow: {} > 0xFFFF", val), - pc: start_pc as u32, - })); + return Err(self.trap(TRAP_OOB, opcode as u16, format!("Bounded value overflow: {} > 0xFFFF", val), start_pc as u32)); } self.push(Value::Bounded(val)); } @@ -469,7 +465,7 @@ impl VirtualMachine { self.push(a); self.push(b); } - OpCode::Add => self.binary_op(|a, b| match (&a, &b) { + OpCode::Add => self.binary_op(opcode, start_pc as u32, |a, b| match (&a, &b) { (Value::String(_), _) | (_, Value::String(_)) => { Ok(Value::String(format!("{}{}", a.to_string(), b.to_string()))) } @@ -485,19 +481,14 @@ impl VirtualMachine { (Value::Bounded(a), Value::Bounded(b)) => { let res = a.saturating_add(*b); if res > 0xFFFF { - Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_OOB, - opcode: OpCode::Add as u16, - message: format!("Bounded addition overflow: {} + {} = {}", a, b, res), - pc: start_pc as u32, - })) + Err(OpError::Trap(TRAP_OOB, format!("Bounded addition overflow: {} + {} = {}", a, b, res))) } else { Ok(Value::Bounded(res)) } } - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for ADD".into())), + _ => Err(OpError::Panic("Invalid types for ADD".into())), })?, - OpCode::Sub => self.binary_op(|a, b| match (a, b) { + OpCode::Sub => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a.wrapping_sub(b))), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a.wrapping_sub(b))), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64).wrapping_sub(b))), @@ -509,19 +500,14 @@ impl VirtualMachine { (Value::Float(a), Value::Int64(b)) => Ok(Value::Float(a - b as f64)), (Value::Bounded(a), Value::Bounded(b)) => { if a < b { - Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_OOB, - opcode: OpCode::Sub as u16, - message: format!("Bounded subtraction underflow: {} - {} < 0", a, b), - pc: start_pc as u32, - })) + Err(OpError::Trap(TRAP_OOB, format!("Bounded subtraction underflow: {} - {} < 0", a, b))) } else { Ok(Value::Bounded(a - b)) } } - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for SUB".into())), + _ => Err(OpError::Panic("Invalid types for SUB".into())), })?, - OpCode::Mul => self.binary_op(|a, b| match (a, b) { + OpCode::Mul => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a.wrapping_mul(b))), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a.wrapping_mul(b))), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64).wrapping_mul(b))), @@ -534,166 +520,96 @@ impl VirtualMachine { (Value::Bounded(a), Value::Bounded(b)) => { let res = a as u64 * b as u64; if res > 0xFFFF { - Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_OOB, - opcode: OpCode::Mul as u16, - message: format!("Bounded multiplication overflow: {} * {} = {}", a, b, res), - pc: start_pc as u32, - })) + Err(OpError::Trap(TRAP_OOB, format!("Bounded multiplication overflow: {} * {} = {}", a, b, res))) } else { Ok(Value::Bounded(res as u32)) } } - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for MUL".into())), + _ => Err(OpError::Panic("Invalid types for MUL".into())), })?, - OpCode::Div => self.binary_op(|a, b| match (a, b) { + OpCode::Div => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Integer division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Integer division by zero".into())); } Ok(Value::Int32(a / b)) } (Value::Int64(a), Value::Int64(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Integer division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Integer division by zero".into())); } Ok(Value::Int64(a / b)) } (Value::Int32(a), Value::Int64(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Integer division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Integer division by zero".into())); } Ok(Value::Int64(a as i64 / b)) } (Value::Int64(a), Value::Int32(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Integer division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Integer division by zero".into())); } Ok(Value::Int64(a / b as i64)) } (Value::Float(a), Value::Float(b)) => { if b == 0.0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Float division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Float division by zero".into())); } Ok(Value::Float(a / b)) } (Value::Int32(a), Value::Float(b)) => { if b == 0.0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Float division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Float division by zero".into())); } Ok(Value::Float(a as f64 / b)) } (Value::Float(a), Value::Int32(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Float division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Float division by zero".into())); } Ok(Value::Float(a / b as f64)) } (Value::Int64(a), Value::Float(b)) => { if b == 0.0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Float division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Float division by zero".into())); } Ok(Value::Float(a as f64 / b)) } (Value::Float(a), Value::Int64(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Float division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Float division by zero".into())); } Ok(Value::Float(a / b as f64)) } (Value::Bounded(a), Value::Bounded(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Div as u16, - message: "Bounded division by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Bounded division by zero".into())); } Ok(Value::Bounded(a / b)) } - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for DIV".into())), + _ => Err(OpError::Panic("Invalid types for DIV".into())), })?, - OpCode::Mod => self.binary_op(|a, b| match (a, b) { + OpCode::Mod => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Mod as u16, - message: "Integer modulo by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Integer modulo by zero".into())); } Ok(Value::Int32(a % b)) } (Value::Int64(a), Value::Int64(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Mod as u16, - message: "Integer modulo by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Integer modulo by zero".into())); } Ok(Value::Int64(a % b)) } (Value::Bounded(a), Value::Bounded(b)) => { if b == 0 { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_DIV_ZERO, - opcode: OpCode::Mod as u16, - message: "Bounded modulo by zero".into(), - pc: start_pc as u32, - })); + return Err(OpError::Trap(TRAP_DIV_ZERO, "Bounded modulo by zero".into())); } Ok(Value::Bounded(a % b)) } - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for MOD".into())), + _ => Err(OpError::Panic("Invalid types for MOD".into())), })?, OpCode::BoundToInt => { let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; @@ -707,44 +623,39 @@ impl VirtualMachine { let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; let int_val = val.as_integer().ok_or_else(|| LogicalFrameEndingReason::Panic("Expected integer for INT_TO_BOUND_CHECKED".into()))?; if int_val < 0 || int_val > 0xFFFF { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_OOB, - opcode: OpCode::IntToBoundChecked as u16, - message: format!("Integer to bounded conversion out of range: {}", int_val), - pc: start_pc as u32, - })); + return Err(self.trap(TRAP_OOB, OpCode::IntToBoundChecked as u16, format!("Integer to bounded conversion out of range: {}", int_val), start_pc as u32)); } self.push(Value::Bounded(int_val as u32)); } - OpCode::Eq => self.binary_op(|a, b| Ok(Value::Boolean(a == b)))?, - OpCode::Neq => self.binary_op(|a, b| Ok(Value::Boolean(a != b)))?, - OpCode::Lt => self.binary_op(|a, b| { + OpCode::Eq => self.binary_op(opcode, start_pc as u32, |a, b| Ok(Value::Boolean(a == b)))?, + OpCode::Neq => self.binary_op(opcode, start_pc as u32, |a, b| Ok(Value::Boolean(a != b)))?, + OpCode::Lt => self.binary_op(opcode, start_pc as u32, |a, b| { a.partial_cmp(&b) .map(|o| Value::Boolean(o == std::cmp::Ordering::Less)) - .ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid types for LT".into())) + .ok_or_else(|| OpError::Panic("Invalid types for LT".into())) })?, - OpCode::Gt => self.binary_op(|a, b| { + OpCode::Gt => self.binary_op(opcode, start_pc as u32, |a, b| { a.partial_cmp(&b) .map(|o| Value::Boolean(o == std::cmp::Ordering::Greater)) - .ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid types for GT".into())) + .ok_or_else(|| OpError::Panic("Invalid types for GT".into())) })?, - OpCode::Lte => self.binary_op(|a, b| { + OpCode::Lte => self.binary_op(opcode, start_pc as u32, |a, b| { a.partial_cmp(&b) .map(|o| Value::Boolean(o != std::cmp::Ordering::Greater)) - .ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid types for LTE".into())) + .ok_or_else(|| OpError::Panic("Invalid types for LTE".into())) })?, - OpCode::Gte => self.binary_op(|a, b| { + OpCode::Gte => self.binary_op(opcode, start_pc as u32, |a, b| { a.partial_cmp(&b) .map(|o| Value::Boolean(o != std::cmp::Ordering::Less)) - .ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid types for GTE".into())) + .ok_or_else(|| OpError::Panic("Invalid types for GTE".into())) })?, - OpCode::And => self.binary_op(|a, b| match (a, b) { + OpCode::And => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Boolean(a), Value::Boolean(b)) => Ok(Value::Boolean(a && b)), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for AND".into())), + _ => Err(OpError::Panic("Invalid types for AND".into())), })?, - OpCode::Or => self.binary_op(|a, b| match (a, b) { + OpCode::Or => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Boolean(a), Value::Boolean(b)) => Ok(Value::Boolean(a || b)), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for OR".into())), + _ => Err(OpError::Panic("Invalid types for OR".into())), })?, OpCode::Not => { let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; @@ -754,40 +665,40 @@ impl VirtualMachine { return Err(LogicalFrameEndingReason::Panic("Invalid type for NOT".into())); } } - OpCode::BitAnd => self.binary_op(|a, b| match (a, b) { + OpCode::BitAnd => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a & b)), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a & b)), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64) & b)), (Value::Int64(a), Value::Int32(b)) => Ok(Value::Int64(a & (b as i64))), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for BitAnd".into())), + _ => Err(OpError::Panic("Invalid types for BitAnd".into())), })?, - OpCode::BitOr => self.binary_op(|a, b| match (a, b) { + OpCode::BitOr => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a | b)), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a | b)), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64) | b)), (Value::Int64(a), Value::Int32(b)) => Ok(Value::Int64(a | (b as i64))), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for BitOr".into())), + _ => Err(OpError::Panic("Invalid types for BitOr".into())), })?, - OpCode::BitXor => self.binary_op(|a, b| match (a, b) { + OpCode::BitXor => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a ^ b)), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a ^ b)), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64) ^ b)), (Value::Int64(a), Value::Int32(b)) => Ok(Value::Int64(a ^ (b as i64))), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for BitXor".into())), + _ => Err(OpError::Panic("Invalid types for BitXor".into())), })?, - OpCode::Shl => self.binary_op(|a, b| match (a, b) { + OpCode::Shl => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a.wrapping_shl(b as u32))), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a.wrapping_shl(b as u32))), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64).wrapping_shl(b as u32))), (Value::Int64(a), Value::Int32(b)) => Ok(Value::Int64(a.wrapping_shl(b as u32))), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for Shl".into())), + _ => Err(OpError::Panic("Invalid types for Shl".into())), })?, - OpCode::Shr => self.binary_op(|a, b| match (a, b) { + OpCode::Shr => self.binary_op(opcode, start_pc as u32, |a, b| match (a, b) { (Value::Int32(a), Value::Int32(b)) => Ok(Value::Int32(a.wrapping_shr(b as u32))), (Value::Int64(a), Value::Int64(b)) => Ok(Value::Int64(a.wrapping_shr(b as u32))), (Value::Int32(a), Value::Int64(b)) => Ok(Value::Int64((a as i64).wrapping_shr(b as u32))), (Value::Int64(a), Value::Int32(b)) => Ok(Value::Int64(a.wrapping_shr(b as u32))), - _ => Err(LogicalFrameEndingReason::Panic("Invalid types for Shr".into())), + _ => Err(OpError::Panic("Invalid types for Shr".into())), })?, OpCode::Neg => { let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; @@ -816,7 +727,8 @@ impl VirtualMachine { let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?; let func = &self.program.functions[frame.func_idx]; - crate::virtual_machine::local_addressing::check_local_slot(func, slot, opcode as u16, start_pc as u32)?; + crate::virtual_machine::local_addressing::check_local_slot(func, slot, opcode as u16, start_pc as u32) + .map_err(|trap_info| self.trap(trap_info.code, trap_info.opcode, trap_info.message, trap_info.pc))?; let stack_idx = crate::virtual_machine::local_addressing::local_index(frame, slot); let val = self.operand_stack.get(stack_idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Internal error: validated local slot not found in stack".into()))?; @@ -828,7 +740,8 @@ impl VirtualMachine { let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?; let func = &self.program.functions[frame.func_idx]; - crate::virtual_machine::local_addressing::check_local_slot(func, slot, opcode as u16, start_pc as u32)?; + crate::virtual_machine::local_addressing::check_local_slot(func, slot, opcode as u16, start_pc as u32) + .map_err(|trap_info| self.trap(trap_info.code, trap_info.opcode, trap_info.message, trap_info.pc))?; let stack_idx = crate::virtual_machine::local_addressing::local_index(frame, slot); self.operand_stack[stack_idx] = val; @@ -836,12 +749,7 @@ impl VirtualMachine { OpCode::Call => { let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let callee = self.program.functions.get(func_id).ok_or_else(|| { - LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_INVALID_FUNC, - opcode: opcode as u16, - message: format!("Invalid func_id {}", func_id), - pc: start_pc as u32, - }) + self.trap(TRAP_INVALID_FUNC, opcode as u16, format!("Invalid func_id {}", func_id), start_pc as u32) })?; if self.operand_stack.len() < callee.param_slots as usize { @@ -874,15 +782,10 @@ impl VirtualMachine { let expected_height = frame.stack_base + func.param_slots as usize + func.local_slots as usize + return_slots; if current_height != expected_height { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: TRAP_BAD_RET_SLOTS, - opcode: opcode as u16, - message: format!( + return Err(self.trap(TRAP_BAD_RET_SLOTS, opcode as u16, format!( "Incorrect stack height at RET in func {}: expected {} slots (stack_base={} + params={} + locals={} + returns={}), got {}", frame.func_idx, expected_height, frame.stack_base, func.param_slots, func.local_slots, return_slots, current_height - ), - pc: start_pc as u32, - })); + ), start_pc as u32)); } // Copy return values (preserving order: pop return_slots values, then reverse to push back) @@ -921,21 +824,11 @@ impl VirtualMachine { let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Gate(base) = ref_val { let val = self.heap.get(base + offset).cloned().ok_or_else(|| { - LogicalFrameEndingReason::Trap(TrapInfo { - code: prometeu_bytecode::abi::TRAP_OOB, - opcode: OpCode::GateLoad as u16, - message: format!("Out-of-bounds heap access at offset {}", offset), - pc: start_pc as u32, - }) + self.trap(prometeu_bytecode::abi::TRAP_OOB, OpCode::GateLoad as u16, format!("Out-of-bounds heap access at offset {}", offset), start_pc as u32) })?; self.push(val); } else { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: prometeu_bytecode::abi::TRAP_TYPE, - opcode: OpCode::GateLoad as u16, - message: "Expected gate handle for GATE_LOAD".to_string(), - pc: start_pc as u32, - })); + return Err(self.trap(prometeu_bytecode::abi::TRAP_TYPE, OpCode::GateLoad as u16, "Expected gate handle for GATE_LOAD".to_string(), start_pc as u32)); } } OpCode::GateStore => { @@ -944,21 +837,11 @@ impl VirtualMachine { let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Gate(base) = ref_val { if base + offset >= self.heap.len() { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: prometeu_bytecode::abi::TRAP_OOB, - opcode: OpCode::GateStore as u16, - message: format!("Out-of-bounds heap access at offset {}", offset), - pc: start_pc as u32, - })); + return Err(self.trap(prometeu_bytecode::abi::TRAP_OOB, OpCode::GateStore as u16, format!("Out-of-bounds heap access at offset {}", offset), start_pc as u32)); } self.heap[base + offset] = val; } else { - return Err(LogicalFrameEndingReason::Trap(TrapInfo { - code: prometeu_bytecode::abi::TRAP_TYPE, - opcode: OpCode::GateStore as u16, - message: "Expected gate handle for GATE_STORE".to_string(), - pc: start_pc as u32, - })); + return Err(self.trap(prometeu_bytecode::abi::TRAP_TYPE, OpCode::GateStore as u16, "Expected gate handle for GATE_STORE".to_string(), start_pc as u32)); } } OpCode::GateBeginPeek | OpCode::GateEndPeek | @@ -975,12 +858,7 @@ impl VirtualMachine { let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); let syscall = crate::hardware::syscalls::Syscall::from_u32(id).ok_or_else(|| { - LogicalFrameEndingReason::Trap(TrapInfo { - code: prometeu_bytecode::abi::TRAP_INVALID_SYSCALL, - opcode: OpCode::Syscall as u16, - message: format!("Unknown syscall: 0x{:08X}", id), - pc: pc_at_syscall, - }) + self.trap(prometeu_bytecode::abi::TRAP_INVALID_SYSCALL, OpCode::Syscall as u16, format!("Unknown syscall: 0x{:08X}", id), pc_at_syscall) })?; let args_count = syscall.args_count(); @@ -988,12 +866,7 @@ impl VirtualMachine { let mut args = Vec::with_capacity(args_count); for _ in 0..args_count { let v = self.pop().map_err(|_e| { - LogicalFrameEndingReason::Trap(TrapInfo { - code: prometeu_bytecode::abi::TRAP_STACK_UNDERFLOW, - opcode: OpCode::Syscall as u16, - message: "Syscall argument stack underflow".to_string(), - pc: pc_at_syscall, - }) + self.trap(prometeu_bytecode::abi::TRAP_STACK_UNDERFLOW, OpCode::Syscall as u16, "Syscall argument stack underflow".to_string(), pc_at_syscall) })?; args.push(v); } @@ -1002,12 +875,7 @@ impl VirtualMachine { let stack_height_before = self.operand_stack.len(); let mut ret = crate::virtual_machine::HostReturn::new(&mut self.operand_stack); native.syscall(id, &args, &mut ret, hw).map_err(|fault| match fault { - crate::virtual_machine::VmFault::Trap(code, msg) => LogicalFrameEndingReason::Trap(TrapInfo { - code, - opcode: OpCode::Syscall as u16, - message: msg, - pc: pc_at_syscall, - }), + crate::virtual_machine::VmFault::Trap(code, msg) => self.trap(code, OpCode::Syscall as u16, msg, pc_at_syscall), crate::virtual_machine::VmFault::Panic(msg) => LogicalFrameEndingReason::Panic(msg), })?; @@ -1030,6 +898,10 @@ impl VirtualMachine { Ok(()) } + pub fn trap(&self, code: u32, opcode: u16, message: String, pc: u32) -> LogicalFrameEndingReason { + LogicalFrameEndingReason::Trap(self.program.create_trap(code, opcode, message, pc)) + } + pub fn push(&mut self, val: Value) { self.operand_stack.push(val); } @@ -1055,15 +927,20 @@ impl VirtualMachine { self.operand_stack.last().ok_or("Stack underflow".into()) } - fn binary_op(&mut self, f: F) -> Result<(), LogicalFrameEndingReason> + fn binary_op(&mut self, opcode: OpCode, start_pc: u32, f: F) -> Result<(), LogicalFrameEndingReason> where - F: FnOnce(Value, Value) -> Result, + F: FnOnce(Value, Value) -> Result, { let b = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; let a = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; - let res = f(a, b)?; - self.push(res); - Ok(()) + match f(a, b) { + Ok(res) => { + self.push(res); + Ok(()) + } + Err(OpError::Trap(code, msg)) => Err(self.trap(code, opcode as u16, msg, start_pc)), + Err(OpError::Panic(msg)) => Err(LogicalFrameEndingReason::Panic(msg)), + } } } @@ -1071,6 +948,7 @@ impl VirtualMachine { mod tests { use super::*; use prometeu_bytecode::v0::FunctionMeta; + use prometeu_bytecode::abi::SourceSpan; use crate::hardware::HardwareBridge; use crate::virtual_machine::{Value, HostReturn, VmFault, expect_int}; @@ -1334,7 +1212,7 @@ mod tests { ]; let mut vm = VirtualMachine { - program: Program::new(rom, vec![], functions), + program: ProgramImage::new(rom, vec![], functions, None), ..Default::default() }; vm.prepare_call("0"); @@ -1379,7 +1257,7 @@ mod tests { ]; let mut vm = VirtualMachine { - program: Program::new(rom, vec![], functions), + program: ProgramImage::new(rom, vec![], functions, None), ..Default::default() }; vm.prepare_call("0"); @@ -1418,7 +1296,7 @@ mod tests { ]; let mut vm2 = VirtualMachine { - program: Program::new(rom2, vec![], functions2), + program: ProgramImage::new(rom2, vec![], functions2, None), ..Default::default() }; vm2.prepare_call("0"); @@ -1527,7 +1405,7 @@ mod tests { ]; let mut vm = VirtualMachine { - program: Program::new(rom, vec![], functions), + program: ProgramImage::new(rom, vec![], functions, None), ..Default::default() }; vm.prepare_call("0"); @@ -2604,4 +2482,93 @@ mod tests { _ => panic!("Expected Trap, got {:?}", report.reason), } } + + #[test] + fn test_traceable_trap_with_span() { + let mut rom = Vec::new(); + // 0: PUSH_I32 10 (6 bytes) + // 6: PUSH_I32 0 (6 bytes) + // 12: DIV (2 bytes) + rom.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes()); + rom.extend_from_slice(&10i32.to_le_bytes()); + rom.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes()); + rom.extend_from_slice(&0i32.to_le_bytes()); + rom.extend_from_slice(&(OpCode::Div as u16).to_le_bytes()); + + let mut pc_to_span = Vec::new(); + pc_to_span.push((0, SourceSpan { file_id: 1, start: 10, end: 15 })); + pc_to_span.push((6, SourceSpan { file_id: 1, start: 16, end: 20 })); + pc_to_span.push((12, SourceSpan { file_id: 1, start: 21, end: 25 })); + + let debug_info = prometeu_bytecode::v0::DebugInfo { + pc_to_span, + function_names: vec![(0, "main".to_string())], + }; + + let program = ProgramImage::new(rom, vec![], vec![], Some(debug_info)); + let mut vm = VirtualMachine { + program, + ..Default::default() + }; + + let mut native = MockNative; + let mut hw = MockHardware; + + vm.prepare_call("0"); + let report = vm.run_budget(100, &mut native, &mut hw).unwrap(); + match report.reason { + LogicalFrameEndingReason::Trap(trap) => { + assert_eq!(trap.code, TRAP_DIV_ZERO); + assert_eq!(trap.pc, 12); + assert_eq!(trap.span, Some(SourceSpan { file_id: 1, start: 21, end: 25 })); + } + _ => panic!("Expected Trap, got {:?}", report.reason), + } + } + + #[test] + fn test_traceable_trap_with_function_name() { + let mut rom = Vec::new(); + // 0: PUSH_I32 10 (6 bytes) + // 6: PUSH_I32 0 (6 bytes) + // 12: DIV (2 bytes) + rom.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes()); + rom.extend_from_slice(&10i32.to_le_bytes()); + rom.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes()); + rom.extend_from_slice(&0i32.to_le_bytes()); + rom.extend_from_slice(&(OpCode::Div as u16).to_le_bytes()); + + let pc_to_span = vec![(12, SourceSpan { file_id: 1, start: 21, end: 25 })]; + let function_names = vec![(0, "math_utils::divide".to_string())]; + + let debug_info = prometeu_bytecode::v0::DebugInfo { + pc_to_span, + function_names, + }; + + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: rom.len() as u32, + ..Default::default() + }]; + + let program = ProgramImage::new(rom, vec![], functions, Some(debug_info)); + let mut vm = VirtualMachine { + program, + ..Default::default() + }; + + let mut native = MockNative; + let mut hw = MockHardware; + + vm.prepare_call("0"); + let report = vm.run_budget(100, &mut native, &mut hw).unwrap(); + match report.reason { + LogicalFrameEndingReason::Trap(trap) => { + assert_eq!(trap.code, TRAP_DIV_ZERO); + assert!(trap.message.contains("math_utils::divide")); + } + _ => panic!("Expected Trap, got {:?}", report.reason), + } + } } diff --git a/docs/specs/pbs/Runtime Traps.md b/docs/specs/pbs/Prometeu Runtime Traps.md similarity index 100% rename from docs/specs/pbs/Runtime Traps.md rename to docs/specs/pbs/Prometeu Runtime Traps.md diff --git a/docs/specs/pbs/files/PRs para Junie.md b/docs/specs/pbs/files/PRs para Junie.md index b74f4a57..1e9b3563 100644 --- a/docs/specs/pbs/files/PRs para Junie.md +++ b/docs/specs/pbs/files/PRs para Junie.md @@ -1,70 +1,3 @@ -## PR-10 — Program image + linker: imports/exports resolved before VM run - -**Why:** Imports are compile-time, but we need an industrial linking model for multi-module PBS. - -### Scope - -* Define in bytecode: - - * `exports`: symbol -> func_id/service entry (as needed) - * `imports`: symbol refs -> relocation slots -* Implement a **linker** that: - - * builds a `ProgramImage` from N modules - * resolves imports to exports - * produces a single final `FunctionTable` and code blob - -### Notes - -* VM **does not** do name lookup at runtime. -* Linking errors are deterministic: `LINK_UNRESOLVED_SYMBOL`, `LINK_DUP_EXPORT`, etc. - -### Tests - -* two-module link success -* unresolved import fails -* duplicate export fails - -### Acceptance - -* Multi-module PBS works; “import” is operationalized correctly. - ---- - -## PR-11 — Canonical integration cartridge + golden bytecode snapshots - -**Why:** One cartridge must be the unbreakable reference. - -### Scope - -* Create `CartridgeCanonical.pbs` that covers: - - * locals - * arithmetic - * if - * function call - * syscall clear - * input snapshot -* Add `golden` artifacts: - - * canonical AST JSON (frontend) - * IR Core (optional) - * IR VM / bytecode dump - * expected VM trace (optional) - -### Tests - -* CI runs cartridge and checks: - - * no traps - * deterministic output state - -### Acceptance - -* This cartridge is the “VM heartbeat test”. - ---- - ## PR-12 — VM test harness: stepper, trace, and property tests **Why:** Industrial quality means test tooling, not just “it runs”.