2026-03-24 13:40:47 +00:00

134 lines
4.2 KiB
Rust

//! Canonical bytecode decoder for Prometeu Bytecode (PBX payload).
//!
//! Single source of truth for instruction decoding used by compiler/linker/verifier/VM.
//!
//! Contract:
//! - Instructions are encoded as: [opcode: u16 LE][immediate: spec.imm_bytes]
//! - `decode_next(pc, bytes)` returns a typed `DecodedInstr` with canonical `next_pc`.
//! - Immediate helpers validate sizes deterministically and return explicit errors.
use crate::isa::core::{CoreOpCode, CoreOpCodeSpecExt, CoreOpcodeSpec};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeError {
TruncatedOpcode { pc: usize },
UnknownOpcode { pc: usize, opcode: u16 },
TruncatedImmediate { pc: usize, opcode: CoreOpCode, need: usize, have: usize },
ImmediateSizeMismatch { pc: usize, opcode: CoreOpCode, expected: usize, actual: usize },
}
#[derive(Debug, Clone, Copy)]
pub struct DecodedInstr<'a> {
pub opcode: CoreOpCode,
pub pc: usize,
pub next_pc: usize,
/// Raw immediate bytes slice, guaranteed to have length `opcode.spec().imm_bytes`.
pub imm: &'a [u8],
}
impl<'a> DecodedInstr<'a> {
#[inline]
fn ensure_len(&self, expected: usize) -> Result<(), DecodeError> {
if self.imm.len() != expected {
return Err(DecodeError::ImmediateSizeMismatch {
pc: self.pc,
opcode: self.opcode,
expected,
actual: self.imm.len(),
});
}
Ok(())
}
#[inline]
pub fn imm_u8(&self) -> Result<u8, DecodeError> {
self.ensure_len(1)?;
Ok(self.imm[0])
}
#[inline]
pub fn imm_u16(&self) -> Result<u16, DecodeError> {
self.ensure_len(2)?;
Ok(u16::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_u32(&self) -> Result<u32, DecodeError> {
self.ensure_len(4)?;
Ok(u32::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_i32(&self) -> Result<i32, DecodeError> {
self.ensure_len(4)?;
Ok(i32::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_i64(&self) -> Result<i64, DecodeError> {
self.ensure_len(8)?;
Ok(i64::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_f64(&self) -> Result<f64, DecodeError> {
self.ensure_len(8)?;
Ok(f64::from_le_bytes(self.imm.try_into().unwrap()))
}
/// Helper for opcodes carrying two u32 values packed in 8 bytes (e.g., ALLOC meta).
#[inline]
pub fn imm_u32x2(&self) -> Result<(u32, u32), DecodeError> {
self.ensure_len(8)?;
let a = u32::from_le_bytes(self.imm[0..4].try_into().unwrap());
let b = u32::from_le_bytes(self.imm[4..8].try_into().unwrap());
Ok((a, b))
}
}
/// Decodes the instruction at program counter `pc` from `bytes`.
/// Returns the decoded instruction with canonical `next_pc`.
#[inline]
pub fn decode_next(pc: usize, bytes: &'_ [u8]) -> Result<DecodedInstr<'_>, DecodeError> {
if pc + 2 > bytes.len() {
return Err(DecodeError::TruncatedOpcode { pc });
}
let opcode_val = u16::from_le_bytes([bytes[pc], bytes[pc + 1]]);
let opcode = CoreOpCode::try_from(opcode_val)
.map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?;
let spec: CoreOpcodeSpec = opcode.spec();
let imm_start = pc + 2;
let imm_end = imm_start + (spec.imm_bytes as usize);
if imm_end > bytes.len() {
return Err(DecodeError::TruncatedImmediate {
pc,
opcode,
need: spec.imm_bytes as usize,
have: bytes.len().saturating_sub(imm_start),
});
}
Ok(DecodedInstr { opcode, pc, next_pc: imm_end, imm: &bytes[imm_start..imm_end] })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unknown_opcode_is_reported_deterministically() {
// 0x0060 was previously a legacy opcode; now it must be unknown.
let bytes = vec![0x60, 0x00]; // little-endian u16 = 0x0060
match decode_next(0, &bytes) {
Err(DecodeError::UnknownOpcode { pc, opcode }) => {
assert_eq!(pc, 0);
assert_eq!(opcode, 0x0060);
}
other => panic!("expected UnknownOpcode, got {:?}", other),
}
}
}