//! Canonical bytecode decoder for Prometeu Bytecode (PBC). //! //! Single source of truth for instruction decoding used by compiler/linker/verifier/VM. //! //! Contract: //! - Instructions are encoded as: [opcode: u16 LE][immediate: spec.imm_bytes] //! - `decode_next(pc, bytes)` returns a typed `DecodedInstr` with canonical `next_pc`. //! - Immediate helpers validate sizes deterministically and return explicit errors. use crate::opcode::OpCode; use crate::opcode_spec::{OpCodeSpecExt, OpcodeSpec}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum DecodeError { TruncatedOpcode { pc: usize }, UnknownOpcode { pc: usize, opcode: u16 }, TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize }, ImmediateSizeMismatch { pc: usize, opcode: OpCode, expected: usize, actual: usize }, } #[derive(Debug, Clone, Copy)] pub struct DecodedInstr<'a> { pub opcode: OpCode, pub pc: usize, pub next_pc: usize, /// Raw immediate bytes slice, guaranteed to have length `opcode.spec().imm_bytes`. pub imm: &'a [u8], } impl<'a> DecodedInstr<'a> { #[inline] fn ensure_len(&self, expected: usize) -> Result<(), DecodeError> { if self.imm.len() != expected { return Err(DecodeError::ImmediateSizeMismatch { pc: self.pc, opcode: self.opcode, expected, actual: self.imm.len(), }); } Ok(()) } #[inline] pub fn imm_u8(&self) -> Result { self.ensure_len(1)?; Ok(self.imm[0]) } #[inline] pub fn imm_u16(&self) -> Result { self.ensure_len(2)?; Ok(u16::from_le_bytes(self.imm.try_into().unwrap())) } #[inline] pub fn imm_u32(&self) -> Result { self.ensure_len(4)?; Ok(u32::from_le_bytes(self.imm.try_into().unwrap())) } #[inline] pub fn imm_i32(&self) -> Result { self.ensure_len(4)?; Ok(i32::from_le_bytes(self.imm.try_into().unwrap())) } #[inline] pub fn imm_i64(&self) -> Result { self.ensure_len(8)?; Ok(i64::from_le_bytes(self.imm.try_into().unwrap())) } #[inline] pub fn imm_f64(&self) -> Result { self.ensure_len(8)?; Ok(f64::from_le_bytes(self.imm.try_into().unwrap())) } /// Helper for opcodes carrying two u32 values packed in 8 bytes (e.g., ALLOC meta). #[inline] pub fn imm_u32x2(&self) -> Result<(u32, u32), DecodeError> { self.ensure_len(8)?; let a = u32::from_le_bytes(self.imm[0..4].try_into().unwrap()); let b = u32::from_le_bytes(self.imm[4..8].try_into().unwrap()); Ok((a, b)) } } /// Decodes the instruction at program counter `pc` from `bytes`. /// Returns the decoded instruction with canonical `next_pc`. #[inline] pub fn decode_next<'a>(pc: usize, bytes: &'a [u8]) -> Result, DecodeError> { if pc + 2 > bytes.len() { return Err(DecodeError::TruncatedOpcode { pc }); } let opcode_val = u16::from_le_bytes([bytes[pc], bytes[pc + 1]]); let opcode = OpCode::try_from(opcode_val) .map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?; let spec: OpcodeSpec = opcode.spec(); let imm_start = pc + 2; let imm_end = imm_start + (spec.imm_bytes as usize); if imm_end > bytes.len() { return Err(DecodeError::TruncatedImmediate { pc, opcode, need: spec.imm_bytes as usize, have: bytes.len().saturating_sub(imm_start), }); } Ok(DecodedInstr { opcode, pc, next_pc: imm_end, imm: &bytes[imm_start..imm_end], }) } #[cfg(test)] mod tests { use super::*; use crate::asm::{assemble, Asm, Operand}; use crate::opcode::OpCode; #[test] fn decode_basic_no_imm() { // Encode a NOP (0x0000) let rom = assemble(&[Asm::Op(OpCode::Nop, vec![])]).unwrap(); let d = decode_next(0, &rom).unwrap(); assert_eq!(d.opcode, OpCode::Nop); assert_eq!(d.pc, 0); assert_eq!(d.next_pc, 2); assert_eq!(d.imm.len(), 0); } #[test] fn decode_with_u32_imm() { // PUSH_CONST 0x11223344 let rom = assemble(&[Asm::Op(OpCode::PushConst, vec![Operand::U32(0x11223344)])]).unwrap(); let d = decode_next(0, &rom).unwrap(); assert_eq!(d.opcode, OpCode::PushConst); assert_eq!(d.imm_u32().unwrap(), 0x11223344); assert_eq!(d.next_pc, 2 + 4); } #[test] fn decode_with_u8_imm() { // PUSH_BOOL true let rom = assemble(&[Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)])]).unwrap(); let d = decode_next(0, &rom).unwrap(); assert_eq!(d.opcode, OpCode::PushBool); assert_eq!(d.imm.len(), 1); assert_eq!(d.imm_u8().unwrap(), 1); assert_eq!(d.next_pc, 2 + 1); } #[test] fn decode_with_i64_and_f64() { // PUSH_I64, PUSH_F64 let rom = assemble(&[ Asm::Op(OpCode::PushI64, vec![Operand::I64(-123)]), Asm::Op(OpCode::PushF64, vec![Operand::F64(3.25)]), ]).unwrap(); let d0 = decode_next(0, &rom).unwrap(); assert_eq!(d0.opcode, OpCode::PushI64); assert_eq!(d0.imm_i64().unwrap(), -123); let d1 = decode_next(d0.next_pc, &rom).unwrap(); assert_eq!(d1.opcode, OpCode::PushF64); assert!((d1.imm_f64().unwrap() - 3.25).abs() < 1e-12); } #[test] fn decode_truncated() { let rom: Vec = vec![0x00, 0x00]; // NOP complete assert!(matches!(decode_next(1, &rom), Err(DecodeError::TruncatedOpcode { .. }))); } #[test] fn roundtrip_encode_decode_table() { let rom = assemble(&[ Asm::Op(OpCode::Nop, vec![]), Asm::Op(OpCode::PushConst, vec![Operand::U32(7)]), Asm::Op(OpCode::Jmp, vec![Operand::U32(4)]), Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]), Asm::Op(OpCode::Halt, vec![]), ]).unwrap(); let mut pc = 0usize; let mut decoded = Vec::new(); while pc < rom.len() { let d = decode_next(pc, &rom).unwrap(); decoded.push(d.opcode); pc = d.next_pc; } assert_eq!(decoded, vec![OpCode::Nop, OpCode::PushConst, OpCode::Jmp, OpCode::PushI64, OpCode::Halt]); assert_eq!(pc, rom.len()); } }