diff --git a/crates/prometeu-bytecode/src/decoder.rs b/crates/prometeu-bytecode/src/decoder.rs new file mode 100644 index 00000000..142a78e7 --- /dev/null +++ b/crates/prometeu-bytecode/src/decoder.rs @@ -0,0 +1,205 @@ +//! Canonical bytecode decoder for Prometeu Bytecode (PBC). +//! +//! Single source of truth for instruction decoding used by compiler/linker/verifier/VM. +//! +//! Contract: +//! - Instructions are encoded as: [opcode: u16 LE][immediate: spec.imm_bytes] +//! - `decode_next(pc, bytes)` returns a typed `DecodedInstr` with canonical `next_pc`. +//! - Immediate helpers validate sizes deterministically and return explicit errors. + +use crate::opcode::OpCode; +use crate::opcode_spec::{OpCodeSpecExt, OpcodeSpec}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DecodeError { + TruncatedOpcode { pc: usize }, + UnknownOpcode { pc: usize, opcode: u16 }, + TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize }, + ImmediateSizeMismatch { pc: usize, opcode: OpCode, expected: usize, actual: usize }, +} + +#[derive(Debug, Clone, Copy)] +pub struct DecodedInstr<'a> { + pub opcode: OpCode, + pub pc: usize, + pub next_pc: usize, + /// Raw immediate bytes slice, guaranteed to have length `opcode.spec().imm_bytes`. + pub imm: &'a [u8], +} + +impl<'a> DecodedInstr<'a> { + #[inline] + fn ensure_len(&self, expected: usize) -> Result<(), DecodeError> { + if self.imm.len() != expected { + return Err(DecodeError::ImmediateSizeMismatch { + pc: self.pc, + opcode: self.opcode, + expected, + actual: self.imm.len(), + }); + } + Ok(()) + } + + #[inline] + pub fn imm_u8(&self) -> Result { + self.ensure_len(1)?; + Ok(self.imm[0]) + } + + #[inline] + pub fn imm_u16(&self) -> Result { + self.ensure_len(2)?; + Ok(u16::from_le_bytes(self.imm.try_into().unwrap())) + } + + #[inline] + pub fn imm_u32(&self) -> Result { + self.ensure_len(4)?; + Ok(u32::from_le_bytes(self.imm.try_into().unwrap())) + } + + #[inline] + pub fn imm_i32(&self) -> Result { + self.ensure_len(4)?; + Ok(i32::from_le_bytes(self.imm.try_into().unwrap())) + } + + #[inline] + pub fn imm_i64(&self) -> Result { + self.ensure_len(8)?; + Ok(i64::from_le_bytes(self.imm.try_into().unwrap())) + } + + #[inline] + pub fn imm_f64(&self) -> Result { + self.ensure_len(8)?; + Ok(f64::from_le_bytes(self.imm.try_into().unwrap())) + } + + /// Helper for opcodes carrying two u32 values packed in 8 bytes (e.g., ALLOC meta). + #[inline] + pub fn imm_u32x2(&self) -> Result<(u32, u32), DecodeError> { + self.ensure_len(8)?; + let a = u32::from_le_bytes(self.imm[0..4].try_into().unwrap()); + let b = u32::from_le_bytes(self.imm[4..8].try_into().unwrap()); + Ok((a, b)) + } +} + +/// Decodes the instruction at program counter `pc` from `bytes`. +/// Returns the decoded instruction with canonical `next_pc`. +#[inline] +pub fn decode_next<'a>(pc: usize, bytes: &'a [u8]) -> Result, DecodeError> { + if pc + 2 > bytes.len() { + return Err(DecodeError::TruncatedOpcode { pc }); + } + + let opcode_val = u16::from_le_bytes([bytes[pc], bytes[pc + 1]]); + let opcode = OpCode::try_from(opcode_val) + .map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?; + + let spec: OpcodeSpec = opcode.spec(); + let imm_start = pc + 2; + let imm_end = imm_start + (spec.imm_bytes as usize); + + if imm_end > bytes.len() { + return Err(DecodeError::TruncatedImmediate { + pc, + opcode, + need: spec.imm_bytes as usize, + have: bytes.len().saturating_sub(imm_start), + }); + } + + Ok(DecodedInstr { + opcode, + pc, + next_pc: imm_end, + imm: &bytes[imm_start..imm_end], + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::asm::{assemble, Asm, Operand}; + use crate::opcode::OpCode; + + #[test] + fn decode_basic_no_imm() { + // Encode a NOP (0x0000) + let rom = assemble(&[Asm::Op(OpCode::Nop, vec![])]).unwrap(); + let d = decode_next(0, &rom).unwrap(); + assert_eq!(d.opcode, OpCode::Nop); + assert_eq!(d.pc, 0); + assert_eq!(d.next_pc, 2); + assert_eq!(d.imm.len(), 0); + } + + #[test] + fn decode_with_u32_imm() { + // PUSH_CONST 0x11223344 + let rom = assemble(&[Asm::Op(OpCode::PushConst, vec![Operand::U32(0x11223344)])]).unwrap(); + let d = decode_next(0, &rom).unwrap(); + assert_eq!(d.opcode, OpCode::PushConst); + assert_eq!(d.imm_u32().unwrap(), 0x11223344); + assert_eq!(d.next_pc, 2 + 4); + } + + #[test] + fn decode_with_u8_imm() { + // PUSH_BOOL true + let rom = assemble(&[Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)])]).unwrap(); + let d = decode_next(0, &rom).unwrap(); + assert_eq!(d.opcode, OpCode::PushBool); + assert_eq!(d.imm.len(), 1); + assert_eq!(d.imm_u8().unwrap(), 1); + assert_eq!(d.next_pc, 2 + 1); + } + + #[test] + fn decode_with_i64_and_f64() { + // PUSH_I64, PUSH_F64 + let rom = assemble(&[ + Asm::Op(OpCode::PushI64, vec![Operand::I64(-123)]), + Asm::Op(OpCode::PushF64, vec![Operand::F64(3.25)]), + ]).unwrap(); + + let d0 = decode_next(0, &rom).unwrap(); + assert_eq!(d0.opcode, OpCode::PushI64); + assert_eq!(d0.imm_i64().unwrap(), -123); + + let d1 = decode_next(d0.next_pc, &rom).unwrap(); + assert_eq!(d1.opcode, OpCode::PushF64); + assert!((d1.imm_f64().unwrap() - 3.25).abs() < 1e-12); + } + + #[test] + fn decode_truncated() { + let rom: Vec = vec![0x00, 0x00]; // NOP complete + assert!(matches!(decode_next(1, &rom), Err(DecodeError::TruncatedOpcode { .. }))); + } + + #[test] + fn roundtrip_encode_decode_table() { + let rom = assemble(&[ + Asm::Op(OpCode::Nop, vec![]), + Asm::Op(OpCode::PushConst, vec![Operand::U32(7)]), + Asm::Op(OpCode::Jmp, vec![Operand::U32(4)]), + Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]), + Asm::Op(OpCode::Halt, vec![]), + ]).unwrap(); + + let mut pc = 0usize; + let mut decoded = Vec::new(); + while pc < rom.len() { + let d = decode_next(pc, &rom).unwrap(); + decoded.push(d.opcode); + pc = d.next_pc; + } + + assert_eq!(decoded, vec![OpCode::Nop, OpCode::PushConst, OpCode::Jmp, OpCode::PushI64, OpCode::Halt]); + assert_eq!(pc, rom.len()); + } +} diff --git a/crates/prometeu-bytecode/src/lib.rs b/crates/prometeu-bytecode/src/lib.rs index ac6a5d7d..a44f1197 100644 --- a/crates/prometeu-bytecode/src/lib.rs +++ b/crates/prometeu-bytecode/src/lib.rs @@ -20,6 +20,7 @@ pub mod readwrite; pub mod asm; pub mod disasm; pub mod layout; +pub mod decoder; mod model; diff --git a/crates/prometeu-vm/src/bytecode/decoder.rs b/crates/prometeu-vm/src/bytecode/decoder.rs index 4532c226..9a45387d 100644 --- a/crates/prometeu-vm/src/bytecode/decoder.rs +++ b/crates/prometeu-vm/src/bytecode/decoder.rs @@ -1,47 +1,8 @@ -use prometeu_bytecode::opcode::OpCode; -use prometeu_bytecode::opcode_spec::{OpCodeSpecExt, OpcodeSpec}; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum DecodeError { - TruncatedOpcode { pc: usize }, - UnknownOpcode { pc: usize, opcode: u16 }, - TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize }, -} - -#[derive(Debug, Clone)] -pub struct DecodedInstr<'a> { - pub opcode: OpCode, - pub spec: OpcodeSpec, - pub imm: &'a [u8], - pub next_pc: usize, -} +// Re-export canonical decoder from prometeu-bytecode to eliminate bespoke implementation in VM. +pub use prometeu_bytecode::decoder::{decode_next, DecodeError, DecodedInstr}; +/// Backwards-compatible shim for legacy call sites; delegates to canonical decoder. +#[inline] pub fn decode_at(rom: &[u8], pc: usize) -> Result, DecodeError> { - if pc + 2 > rom.len() { - return Err(DecodeError::TruncatedOpcode { pc }); - } - let opcode_val = u16::from_le_bytes([rom[pc], rom[pc+1]]); - let opcode = OpCode::try_from(opcode_val).map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?; - let spec = opcode.spec(); - - let imm_start = pc + 2; - let imm_end = imm_start + spec.imm_bytes as usize; - - if imm_end > rom.len() { - return Err(DecodeError::TruncatedImmediate { - pc, - opcode, - need: spec.imm_bytes as usize, - have: rom.len().saturating_sub(imm_start) - }); - } - - let imm = &rom[imm_start..imm_end]; - - Ok(DecodedInstr { - opcode, - spec, - imm, - next_pc: imm_end, - }) + decode_next(pc, rom) } diff --git a/crates/prometeu-vm/src/verifier.rs b/crates/prometeu-vm/src/verifier.rs index c7bf92ac..e017f0a9 100644 --- a/crates/prometeu-vm/src/verifier.rs +++ b/crates/prometeu-vm/src/verifier.rs @@ -1,6 +1,7 @@ use prometeu_abi::syscalls::Syscall; use crate::bytecode::decoder::{decode_at, DecodeError}; use prometeu_bytecode::opcode::OpCode; +use prometeu_bytecode::opcode_spec::OpCodeSpecExt; use prometeu_bytecode::FunctionMeta; use prometeu_bytecode::layout; use std::collections::{HashMap, HashSet, VecDeque}; @@ -68,6 +69,8 @@ impl Verifier { VerifierError::TruncatedOpcode { pc: func_start + pc }, DecodeError::TruncatedImmediate { pc: _, opcode, need, have } => VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need, have }, + DecodeError::ImmediateSizeMismatch { pc: _, opcode, expected, actual } => + VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need: expected, have: actual }, })?; pc = instr.next_pc; } @@ -87,16 +90,16 @@ impl Verifier { while let Some(pc) = worklist.pop_front() { let in_height = *stack_height_in.get(&pc).unwrap(); let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass - let spec = instr.spec; + let spec = instr.opcode.spec(); // Resolve dynamic pops/pushes let (pops, pushes) = match instr.opcode { OpCode::PopN => { - let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as u16; + let n = instr.imm_u32().unwrap() as u16; (n, 0) } OpCode::Call => { - let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let func_id = instr.imm_u32().unwrap(); let callee = all_functions.get(func_id as usize).ok_or_else(|| { VerifierError::InvalidFuncId { pc: func_start + pc, id: func_id } })?; @@ -106,7 +109,7 @@ impl Verifier { (func.return_slots, 0) } OpCode::Syscall => { - let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let id = instr.imm_u32().unwrap(); let syscall = Syscall::from_u32(id).ok_or_else(|| { VerifierError::InvalidSyscallId { pc: func_start + pc, id } })?; @@ -131,7 +134,7 @@ impl Verifier { // Propagate to successors if spec.is_branch { // Canonical contract: branch immediate is RELATIVE to function start. - let target_rel = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let target_rel = instr.imm_u32().unwrap() as usize; let func_end_abs = layout::function_end_from_next(all_functions, func_idx, code.len()); let func_len = func_end_abs - func_start; diff --git a/crates/prometeu-vm/src/virtual_machine.rs b/crates/prometeu-vm/src/virtual_machine.rs index 045896ef..029302cb 100644 --- a/crates/prometeu-vm/src/virtual_machine.rs +++ b/crates/prometeu-vm/src/virtual_machine.rs @@ -379,12 +379,12 @@ impl VirtualMachine { self.halted = true; } OpCode::Jmp => { - let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let target = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let func_start = self.call_stack.last().map(|f| self.program.functions[f.func_idx].code_offset as usize).unwrap_or(0); self.pc = func_start + target; } OpCode::JmpIfFalse => { - let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let target = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; match val { Value::Boolean(false) => { @@ -398,7 +398,7 @@ impl VirtualMachine { } } OpCode::JmpIfTrue => { - let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let target = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; match val { Value::Boolean(true) => { @@ -415,38 +415,38 @@ impl VirtualMachine { // Handled in run_budget for interruption } OpCode::PushConst => { - let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let idx = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let val = self.program.constant_pool.get(idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid constant index".into()))?; self.push(val); } OpCode::PushI64 => { - let val = i64::from_le_bytes(instr.imm[0..8].try_into().unwrap()); + let val = instr.imm_i64().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; self.push(Value::Int64(val)); } OpCode::PushI32 => { - let val = i32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let val = instr.imm_i32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; self.push(Value::Int32(val)); } OpCode::PushBounded => { - let val = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let val = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; if val > 0xFFFF { return Err(self.trap(TRAP_OOB, opcode as u16, format!("Bounded value overflow: {} > 0xFFFF", val), start_pc as u32)); } self.push(Value::Bounded(val)); } OpCode::PushF64 => { - let val = f64::from_le_bytes(instr.imm[0..8].try_into().unwrap()); + let val = instr.imm_f64().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; self.push(Value::Float(val)); } OpCode::PushBool => { - let val = instr.imm[0]; + let val = instr.imm_u8().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; self.push(Value::Boolean(val != 0)); } OpCode::Pop => { self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; } OpCode::PopN => { - let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let n = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; for _ in 0..n { self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; } @@ -706,12 +706,12 @@ impl VirtualMachine { } } OpCode::GetGlobal => { - let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let idx = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let val = self.globals.get(idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid global index".into()))?; self.push(val); } OpCode::SetGlobal => { - let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let idx = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if idx >= self.globals.len() { self.globals.resize(idx + 1, Value::Null); @@ -719,7 +719,7 @@ impl VirtualMachine { self.globals[idx] = val; } OpCode::GetLocal => { - let slot = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let slot = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?; let func = &self.program.functions[frame.func_idx]; @@ -731,7 +731,7 @@ impl VirtualMachine { self.push(val); } OpCode::SetLocal => { - let slot = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let slot = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?; let func = &self.program.functions[frame.func_idx]; @@ -743,7 +743,7 @@ impl VirtualMachine { self.operand_stack[stack_idx] = val; } OpCode::Call => { - let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let func_id = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let callee = self.program.functions.get(func_id).ok_or_else(|| { self.trap(TRAP_INVALID_FUNC, opcode as u16, format!("Invalid func_id {}", func_id), start_pc as u32) })?; @@ -808,8 +808,7 @@ impl VirtualMachine { } OpCode::Alloc => { // Allocate a new gate with given type and number of slots. - let type_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); - let slots_u32 = u32::from_le_bytes(instr.imm[4..8].try_into().unwrap()); + let (type_id, slots_u32) = instr.imm_u32x2().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; let slots = slots_u32 as usize; // Bump-allocate on the heap and zero-initialize with Null. @@ -832,7 +831,7 @@ impl VirtualMachine { self.push(Value::Gate(gate_id as usize)); } OpCode::GateLoad => { - let offset = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let offset = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Gate(gid_usize) = ref_val { let gid = gid_usize as GateId; @@ -855,7 +854,7 @@ impl VirtualMachine { } } OpCode::GateStore => { - let offset = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let offset = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Gate(gid_usize) = ref_val { @@ -886,8 +885,7 @@ impl VirtualMachine { } OpCode::Syscall => { let pc_at_syscall = start_pc as u32; - - let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let id = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; let syscall = prometeu_abi::syscalls::Syscall::from_u32(id).ok_or_else(|| { self.trap(prometeu_bytecode::abi::TRAP_INVALID_SYSCALL, OpCode::Syscall as u16, format!("Unknown syscall: 0x{:08X}", id), pc_at_syscall) })?;