206 lines
6.4 KiB
Rust
206 lines
6.4 KiB
Rust
//! Canonical bytecode decoder for Prometeu Bytecode (PBC).
|
|
//!
|
|
//! Single source of truth for instruction decoding used by compiler/linker/verifier/VM.
|
|
//!
|
|
//! Contract:
|
|
//! - Instructions are encoded as: [opcode: u16 LE][immediate: spec.imm_bytes]
|
|
//! - `decode_next(pc, bytes)` returns a typed `DecodedInstr` with canonical `next_pc`.
|
|
//! - Immediate helpers validate sizes deterministically and return explicit errors.
|
|
|
|
use crate::opcode::OpCode;
|
|
use crate::opcode_spec::{OpCodeSpecExt, OpcodeSpec};
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum DecodeError {
|
|
TruncatedOpcode { pc: usize },
|
|
UnknownOpcode { pc: usize, opcode: u16 },
|
|
TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize },
|
|
ImmediateSizeMismatch { pc: usize, opcode: OpCode, expected: usize, actual: usize },
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct DecodedInstr<'a> {
|
|
pub opcode: OpCode,
|
|
pub pc: usize,
|
|
pub next_pc: usize,
|
|
/// Raw immediate bytes slice, guaranteed to have length `opcode.spec().imm_bytes`.
|
|
pub imm: &'a [u8],
|
|
}
|
|
|
|
impl<'a> DecodedInstr<'a> {
|
|
#[inline]
|
|
fn ensure_len(&self, expected: usize) -> Result<(), DecodeError> {
|
|
if self.imm.len() != expected {
|
|
return Err(DecodeError::ImmediateSizeMismatch {
|
|
pc: self.pc,
|
|
opcode: self.opcode,
|
|
expected,
|
|
actual: self.imm.len(),
|
|
});
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[inline]
|
|
pub fn imm_u8(&self) -> Result<u8, DecodeError> {
|
|
self.ensure_len(1)?;
|
|
Ok(self.imm[0])
|
|
}
|
|
|
|
#[inline]
|
|
pub fn imm_u16(&self) -> Result<u16, DecodeError> {
|
|
self.ensure_len(2)?;
|
|
Ok(u16::from_le_bytes(self.imm.try_into().unwrap()))
|
|
}
|
|
|
|
#[inline]
|
|
pub fn imm_u32(&self) -> Result<u32, DecodeError> {
|
|
self.ensure_len(4)?;
|
|
Ok(u32::from_le_bytes(self.imm.try_into().unwrap()))
|
|
}
|
|
|
|
#[inline]
|
|
pub fn imm_i32(&self) -> Result<i32, DecodeError> {
|
|
self.ensure_len(4)?;
|
|
Ok(i32::from_le_bytes(self.imm.try_into().unwrap()))
|
|
}
|
|
|
|
#[inline]
|
|
pub fn imm_i64(&self) -> Result<i64, DecodeError> {
|
|
self.ensure_len(8)?;
|
|
Ok(i64::from_le_bytes(self.imm.try_into().unwrap()))
|
|
}
|
|
|
|
#[inline]
|
|
pub fn imm_f64(&self) -> Result<f64, DecodeError> {
|
|
self.ensure_len(8)?;
|
|
Ok(f64::from_le_bytes(self.imm.try_into().unwrap()))
|
|
}
|
|
|
|
/// Helper for opcodes carrying two u32 values packed in 8 bytes (e.g., ALLOC meta).
|
|
#[inline]
|
|
pub fn imm_u32x2(&self) -> Result<(u32, u32), DecodeError> {
|
|
self.ensure_len(8)?;
|
|
let a = u32::from_le_bytes(self.imm[0..4].try_into().unwrap());
|
|
let b = u32::from_le_bytes(self.imm[4..8].try_into().unwrap());
|
|
Ok((a, b))
|
|
}
|
|
}
|
|
|
|
/// Decodes the instruction at program counter `pc` from `bytes`.
|
|
/// Returns the decoded instruction with canonical `next_pc`.
|
|
#[inline]
|
|
pub fn decode_next<'a>(pc: usize, bytes: &'a [u8]) -> Result<DecodedInstr<'a>, DecodeError> {
|
|
if pc + 2 > bytes.len() {
|
|
return Err(DecodeError::TruncatedOpcode { pc });
|
|
}
|
|
|
|
let opcode_val = u16::from_le_bytes([bytes[pc], bytes[pc + 1]]);
|
|
let opcode = OpCode::try_from(opcode_val)
|
|
.map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?;
|
|
|
|
let spec: OpcodeSpec = opcode.spec();
|
|
let imm_start = pc + 2;
|
|
let imm_end = imm_start + (spec.imm_bytes as usize);
|
|
|
|
if imm_end > bytes.len() {
|
|
return Err(DecodeError::TruncatedImmediate {
|
|
pc,
|
|
opcode,
|
|
need: spec.imm_bytes as usize,
|
|
have: bytes.len().saturating_sub(imm_start),
|
|
});
|
|
}
|
|
|
|
Ok(DecodedInstr {
|
|
opcode,
|
|
pc,
|
|
next_pc: imm_end,
|
|
imm: &bytes[imm_start..imm_end],
|
|
})
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::asm::{assemble, Asm, Operand};
|
|
use crate::opcode::OpCode;
|
|
|
|
#[test]
|
|
fn decode_basic_no_imm() {
|
|
// Encode a NOP (0x0000)
|
|
let rom = assemble(&[Asm::Op(OpCode::Nop, vec![])]).unwrap();
|
|
let d = decode_next(0, &rom).unwrap();
|
|
assert_eq!(d.opcode, OpCode::Nop);
|
|
assert_eq!(d.pc, 0);
|
|
assert_eq!(d.next_pc, 2);
|
|
assert_eq!(d.imm.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_with_u32_imm() {
|
|
// PUSH_CONST 0x11223344
|
|
let rom = assemble(&[Asm::Op(OpCode::PushConst, vec![Operand::U32(0x11223344)])]).unwrap();
|
|
let d = decode_next(0, &rom).unwrap();
|
|
assert_eq!(d.opcode, OpCode::PushConst);
|
|
assert_eq!(d.imm_u32().unwrap(), 0x11223344);
|
|
assert_eq!(d.next_pc, 2 + 4);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_with_u8_imm() {
|
|
// PUSH_BOOL true
|
|
let rom = assemble(&[Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)])]).unwrap();
|
|
let d = decode_next(0, &rom).unwrap();
|
|
assert_eq!(d.opcode, OpCode::PushBool);
|
|
assert_eq!(d.imm.len(), 1);
|
|
assert_eq!(d.imm_u8().unwrap(), 1);
|
|
assert_eq!(d.next_pc, 2 + 1);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_with_i64_and_f64() {
|
|
// PUSH_I64, PUSH_F64
|
|
let rom = assemble(&[
|
|
Asm::Op(OpCode::PushI64, vec![Operand::I64(-123)]),
|
|
Asm::Op(OpCode::PushF64, vec![Operand::F64(3.25)]),
|
|
]).unwrap();
|
|
|
|
let d0 = decode_next(0, &rom).unwrap();
|
|
assert_eq!(d0.opcode, OpCode::PushI64);
|
|
assert_eq!(d0.imm_i64().unwrap(), -123);
|
|
|
|
let d1 = decode_next(d0.next_pc, &rom).unwrap();
|
|
assert_eq!(d1.opcode, OpCode::PushF64);
|
|
assert!((d1.imm_f64().unwrap() - 3.25).abs() < 1e-12);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_truncated() {
|
|
let rom: Vec<u8> = vec![0x00, 0x00]; // NOP complete
|
|
assert!(matches!(decode_next(1, &rom), Err(DecodeError::TruncatedOpcode { .. })));
|
|
}
|
|
|
|
#[test]
|
|
fn roundtrip_encode_decode_table() {
|
|
let rom = assemble(&[
|
|
Asm::Op(OpCode::Nop, vec![]),
|
|
Asm::Op(OpCode::PushConst, vec![Operand::U32(7)]),
|
|
Asm::Op(OpCode::Jmp, vec![Operand::U32(4)]),
|
|
Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]),
|
|
Asm::Op(OpCode::Halt, vec![]),
|
|
]).unwrap();
|
|
|
|
let mut pc = 0usize;
|
|
let mut decoded = Vec::new();
|
|
while pc < rom.len() {
|
|
let d = decode_next(pc, &rom).unwrap();
|
|
decoded.push(d.opcode);
|
|
pc = d.next_pc;
|
|
}
|
|
|
|
assert_eq!(decoded, vec![OpCode::Nop, OpCode::PushConst, OpCode::Jmp, OpCode::PushI64, OpCode::Halt]);
|
|
assert_eq!(pc, rom.len());
|
|
}
|
|
}
|