diff --git a/crates/prometeu-bytecode/src/abi.rs b/crates/prometeu-bytecode/src/abi.rs index 2cc87242..613161e1 100644 --- a/crates/prometeu-bytecode/src/abi.rs +++ b/crates/prometeu-bytecode/src/abi.rs @@ -62,10 +62,7 @@ pub struct TrapInfo { /// Checks if an instruction is a jump (branch) instruction. pub fn is_jump(opcode: OpCode) -> bool { - match opcode { - OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => true, - _ => false, - } + opcode.spec().is_branch } /// Checks if an instruction has any immediate operands in the instruction stream. diff --git a/crates/prometeu-bytecode/src/io.rs b/crates/prometeu-bytecode/src/io.rs new file mode 100644 index 00000000..def3b6b5 --- /dev/null +++ b/crates/prometeu-bytecode/src/io.rs @@ -0,0 +1,11 @@ +pub fn read_u32_le(buf: &[u8], pos: usize) -> Option { + let b = buf.get(pos..pos + 4)?; + Some(u32::from_le_bytes([b[0], b[1], b[2], b[3]])) +} + +pub fn write_u32_le(buf: &mut [u8], pos: usize, v: u32) -> Option<()> { + let b = buf.get_mut(pos..pos + 4)?; + let le = v.to_le_bytes(); + b.copy_from_slice(&le); + Some(()) +} diff --git a/crates/prometeu-bytecode/src/layout.rs b/crates/prometeu-bytecode/src/layout.rs index 53e61e85..56cf6197 100644 --- a/crates/prometeu-bytecode/src/layout.rs +++ b/crates/prometeu-bytecode/src/layout.rs @@ -53,12 +53,3 @@ pub fn function_index_by_pc(functions: &[FunctionMeta], code_len_total: usize, p } None } - -/// Clamps an absolute jump target to the end (exclusive) of the enclosing -/// function identified by `func_idx`. -#[inline] -pub fn clamp_jump_target(functions: &[FunctionMeta], code_len_total: usize, func_idx: usize, target_abs: u32) -> u32 { - let start = functions.get(func_idx).map(|f| f.code_offset as usize).unwrap_or(0); - let end = function_end_from_next(functions, func_idx, code_len_total); - if (target_abs as usize) > end { end as u32 } else { target_abs } -} diff --git a/crates/prometeu-bytecode/src/lib.rs b/crates/prometeu-bytecode/src/lib.rs index a44f1197..cc4ede51 100644 --- a/crates/prometeu-bytecode/src/lib.rs +++ b/crates/prometeu-bytecode/src/lib.rs @@ -23,5 +23,6 @@ pub mod layout; pub mod decoder; mod model; +pub mod io; pub use model::*; diff --git a/crates/prometeu-compiler/src/building/linker.rs b/crates/prometeu-compiler/src/building/linker.rs index 2232fd59..2ea6c55e 100644 --- a/crates/prometeu-compiler/src/building/linker.rs +++ b/crates/prometeu-compiler/src/building/linker.rs @@ -2,14 +2,16 @@ use crate::building::output::CompiledModule; use crate::building::plan::BuildStep; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::layout; -use prometeu_bytecode::opcode_spec::OpCodeSpecExt; +use prometeu_bytecode::decoder::decode_next; use prometeu_bytecode::{ConstantPoolEntry, DebugInfo}; use std::collections::HashMap; use prometeu_abi::virtual_machine::{ProgramImage, Value}; use prometeu_analysis::ids::ProjectId; +use prometeu_bytecode::readwrite::{read_u32_le, write_u32_le}; #[derive(Debug, PartialEq, Eq, Clone)] pub enum LinkError { + OutOfBounds(usize, usize), UnresolvedSymbol(String), DuplicateExport(String), IncompatibleSymbolSignature(String), @@ -18,6 +20,7 @@ pub enum LinkError { impl std::fmt::Display for LinkError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + LinkError::OutOfBounds(pos, len) => write!(f, "Out of bounds: pos={} len={}", pos, len), LinkError::UnresolvedSymbol(s) => write!(f, "Unresolved symbol: {}", s), LinkError::DuplicateExport(s) => write!(f, "Duplicate export: {}", s), LinkError::IncompatibleSymbolSignature(s) => write!(f, "Incompatible symbol signature: {}", s), @@ -64,6 +67,7 @@ impl ConstantPoolBitKey { } impl Linker { + pub fn link(modules: Vec, steps: Vec) -> Result { if modules.len() != steps.len() { return Err(LinkError::IncompatibleSymbolSignature(format!("Module count ({}) does not match build steps count ({})", modules.len(), steps.len()))); @@ -195,72 +199,75 @@ impl Linker { } } - // Internal call relocation (from module-local func_idx to global func_idx) - // And PUSH_CONST relocation. - // Also relocate intra-module jump target addresses when modules are concatenated. - - // Small helper to patch a 32-bit immediate at `pos` using a transformer function. - // Safety: caller must ensure `pos + 4 <= end`. - let mut patch_u32_at = |buf: &mut Vec, pos: usize, f: &dyn Fn(u32) -> u32| { - let current = u32::from_le_bytes(buf[pos..pos+4].try_into().unwrap()); - let next = f(current); - buf[pos..pos+4].copy_from_slice(&next.to_le_bytes()); - }; - let mut pos = code_offset; + let mut pc = code_offset; let end = code_offset + module.code.len(); - while pos < end { - if pos + 2 > end { break; } - let op_val = u16::from_le_bytes([combined_code[pos], combined_code[pos+1]]); - let opcode = match OpCode::try_from(op_val) { - Ok(op) => op, - Err(_) => { - pos += 2; - continue; + while pc < end { + // Scope the immutable borrow from decode_next so we can mutate combined_code afterwards + let (opcode, next_pc, imm_start, imm_u32_opt) = { + match decode_next(pc, &combined_code) { + Ok(instr) => { + let opcode = instr.opcode; + let next_pc = instr.next_pc; + let imm_start = instr.pc + 2; // start of immediate payload + let imm_u32_opt = match opcode { + OpCode::PushConst | OpCode::Call | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { + match instr.imm_u32() { + Ok(v) => Some(v), + Err(_) => None, + } + } + _ => None, + }; + (opcode, next_pc, imm_start, imm_u32_opt) + } + Err(e) => { + return Err(LinkError::IncompatibleSymbolSignature(format!( + "Bytecode decode error at pc {}: {:?}", + pc - code_offset, e + ))); + } } }; - pos += 2; - let imm_len = opcode.spec().imm_bytes as usize; match opcode { OpCode::PushConst => { - if pos + imm_len <= end && imm_len == 4 { - let local_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()) as usize; - if let Some(&global_idx) = local_to_global_const.get(local_idx) { - combined_code[pos..pos+4].copy_from_slice(&global_idx.to_le_bytes()); - } + let local_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( + "Invalid PUSH_CONST immediate at pc {}", + pc - code_offset + )))? as usize; + if let Some(&global_idx) = local_to_global_const.get(local_idx) { + patch_u32_at(&mut combined_code, imm_start, &|_| global_idx); } - pos += imm_len; } OpCode::Call => { - if pos + imm_len <= end && imm_len == 4 { - let local_func_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()); - - // Check if this PC was already patched by an import. - // If it wasn't, it's an internal call that needs relocation. - // `import.relocation_pcs` holds the PC at the start of the CALL immediate (after opcode), - // and here `pos` currently points exactly at that immediate. - let reloc_pc = (pos - code_offset) as u32; - let is_import = module.imports.iter().any(|imp| imp.relocation_pcs.contains(&reloc_pc)); - - if !is_import { - let global_func_idx = module_function_offsets[i] + local_func_idx; - combined_code[pos..pos+4].copy_from_slice(&global_func_idx.to_le_bytes()); - } + let local_func_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( + "Invalid CALL immediate at pc {}", + pc - code_offset + )))?; + // Determine if this CALL site corresponds to an import relocation. + let reloc_rel_pc = (imm_start - code_offset) as u32; + let is_import = module + .imports + .iter() + .any(|imp| imp.relocation_pcs.contains(&reloc_rel_pc)); + if !is_import { + let global_func_idx = module_function_offsets[i] + local_func_idx; + patch_u32_at(&mut combined_code, imm_start, &|_| global_func_idx); } - pos += imm_len; } // Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { - if pos + imm_len <= end && imm_len == 4 { - patch_u32_at(&mut combined_code, pos, &|cur| cur + (code_offset as u32)); - } - pos += imm_len; - } - _ => { - // Generic advance using canonical immediate length. - pos += imm_len; + // For branches, immediate must be present and represents rel PC from function start + let _ = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( + "Invalid branch immediate at pc {}", + pc - code_offset + )))?; + patch_u32_at(&mut combined_code, imm_start, &|cur| cur + (code_offset as u32)); } + _ => {} } + + pc = next_pc; } } @@ -384,6 +391,17 @@ impl Linker { } } +fn patch_u32_at( + buf: &mut [u8], + pos: usize, + f: impl FnOnce(u32) -> u32, +) -> Result<(), LinkError> { + let current = prometeu_bytecode::io::read_u32_le(buf, pos).ok_or(LinkError::OutOfBounds(pos, buf.len()))?; + let next = f(current); + prometeu_bytecode::io::write_u32_le(buf, pos, next).ok_or(LinkError::OutOfBounds(pos, buf.len()))?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/prometeu-compiler/src/building/orchestrator.rs b/crates/prometeu-compiler/src/building/orchestrator.rs index 797f49c6..f682a4d7 100644 --- a/crates/prometeu-compiler/src/building/orchestrator.rs +++ b/crates/prometeu-compiler/src/building/orchestrator.rs @@ -248,7 +248,6 @@ mod tests { #[test] fn test_framesync_injected_end_to_end() { use prometeu_bytecode::opcode::OpCode; - use prometeu_bytecode::opcode_spec::OpCodeSpecExt; let dir = tempdir().unwrap(); let project_dir = dir.path().to_path_buf(); fs::create_dir_all(project_dir.join("src/main/modules")).unwrap(); @@ -280,22 +279,15 @@ mod tests { let end = (meta.code_offset + meta.code_len) as usize; let code = &res.image.rom[start..end]; - // Decode sequentially: each instruction is a u16 opcode (LE) followed by operands. - // We'll walk forward and record the sequence of opcodes, ignoring operands based on known sizes. - fn operand_size(op: u16) -> usize { - match OpCode::try_from(op) { - Ok(opc) => opc.spec().imm_bytes as usize, - Err(_) => 0, - } - } - + // Decode sequentially using the canonical decoder; record opcode stream. let mut pcs = Vec::new(); let mut i = 0usize; - while i + 1 < code.len() { - let op = u16::from_le_bytes([code[i], code[i + 1]]); - pcs.push(op); - i += 2 + operand_size(op); + while i < code.len() { + let instr = prometeu_bytecode::decoder::decode_next(i, code).expect("decoder should succeed"); + pcs.push(instr.opcode as u16); + i = instr.next_pc; } + assert_eq!(i, code.len(), "decoder must end exactly at function end"); assert!(pcs.len() >= 2); let last = *pcs.last().unwrap(); diff --git a/crates/prometeu-vm/src/bytecode/decoder.rs b/crates/prometeu-vm/src/bytecode/decoder.rs deleted file mode 100644 index 9a45387d..00000000 --- a/crates/prometeu-vm/src/bytecode/decoder.rs +++ /dev/null @@ -1,8 +0,0 @@ -// Re-export canonical decoder from prometeu-bytecode to eliminate bespoke implementation in VM. -pub use prometeu_bytecode::decoder::{decode_next, DecodeError, DecodedInstr}; - -/// Backwards-compatible shim for legacy call sites; delegates to canonical decoder. -#[inline] -pub fn decode_at(rom: &[u8], pc: usize) -> Result, DecodeError> { - decode_next(pc, rom) -} diff --git a/crates/prometeu-vm/src/bytecode/mod.rs b/crates/prometeu-vm/src/bytecode/mod.rs deleted file mode 100644 index 56812db3..00000000 --- a/crates/prometeu-vm/src/bytecode/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod decoder; diff --git a/crates/prometeu-vm/src/lib.rs b/crates/prometeu-vm/src/lib.rs index e6b9562d..7f2d292c 100644 --- a/crates/prometeu-vm/src/lib.rs +++ b/crates/prometeu-vm/src/lib.rs @@ -2,8 +2,6 @@ mod virtual_machine; mod call_frame; mod scope_frame; pub mod local_addressing; -pub mod opcode_spec; -pub mod bytecode; pub mod verifier; pub use prometeu_abi::virtual_machine::program::ProgramImage; diff --git a/crates/prometeu-vm/src/opcode_spec.rs b/crates/prometeu-vm/src/opcode_spec.rs deleted file mode 100644 index a15ca29b..00000000 --- a/crates/prometeu-vm/src/opcode_spec.rs +++ /dev/null @@ -1,3 +0,0 @@ -// Canonical `OpcodeSpec` now lives in `prometeu-bytecode`. -// Keep this module as a thin re-export for compatibility. -pub use prometeu_bytecode::opcode_spec::*; diff --git a/crates/prometeu-vm/src/verifier.rs b/crates/prometeu-vm/src/verifier.rs index e017f0a9..dc74f30c 100644 --- a/crates/prometeu-vm/src/verifier.rs +++ b/crates/prometeu-vm/src/verifier.rs @@ -1,5 +1,5 @@ use prometeu_abi::syscalls::Syscall; -use crate::bytecode::decoder::{decode_at, DecodeError}; +use prometeu_bytecode::decoder::{decode_next, DecodeError}; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::opcode_spec::OpCodeSpecExt; use prometeu_bytecode::FunctionMeta; @@ -62,7 +62,7 @@ impl Verifier { let mut pc = 0; while pc < func_code.len() { valid_pc.insert(pc); - let instr = decode_at(func_code, pc).map_err(|e| match e { + let instr = decode_next(pc, func_code).map_err(|e| match e { DecodeError::UnknownOpcode { pc: _, opcode } => VerifierError::UnknownOpcode { pc: func_start + pc, opcode }, DecodeError::TruncatedOpcode { pc: _ } => @@ -89,7 +89,7 @@ impl Verifier { while let Some(pc) = worklist.pop_front() { let in_height = *stack_height_in.get(&pc).unwrap(); - let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass + let instr = decode_next(pc, func_code).unwrap(); // Guaranteed to succeed due to first pass let spec = instr.opcode.spec(); // Resolve dynamic pops/pushes diff --git a/crates/prometeu-vm/src/virtual_machine.rs b/crates/prometeu-vm/src/virtual_machine.rs index 029302cb..8958f05d 100644 --- a/crates/prometeu-vm/src/virtual_machine.rs +++ b/crates/prometeu-vm/src/virtual_machine.rs @@ -366,7 +366,7 @@ impl VirtualMachine { let start_pc = self.pc; // Fetch & Decode - let instr = crate::bytecode::decoder::decode_at(&self.program.rom, self.pc) + let instr = prometeu_bytecode::decoder::decode_next(self.pc, &self.program.rom) .map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; let opcode = instr.opcode; diff --git a/test-cartridges/canonical/golden/program.pbc b/test-cartridges/canonical/golden/program.pbc index 5a70490d..3646ac85 100644 Binary files a/test-cartridges/canonical/golden/program.pbc and b/test-cartridges/canonical/golden/program.pbc differ