This commit is contained in:
bQUARKz 2026-02-09 23:30:28 +00:00
parent 5941cd7248
commit 5b4b1ea58d
Signed by: bquarkz
SSH Key Fingerprint: SHA256:Z7dgqoglWwoK6j6u4QC87OveEq74WOhFN+gitsxtkf8
5 changed files with 238 additions and 70 deletions

View File

@ -0,0 +1,205 @@
//! Canonical bytecode decoder for Prometeu Bytecode (PBC).
//!
//! Single source of truth for instruction decoding used by compiler/linker/verifier/VM.
//!
//! Contract:
//! - Instructions are encoded as: [opcode: u16 LE][immediate: spec.imm_bytes]
//! - `decode_next(pc, bytes)` returns a typed `DecodedInstr` with canonical `next_pc`.
//! - Immediate helpers validate sizes deterministically and return explicit errors.
use crate::opcode::OpCode;
use crate::opcode_spec::{OpCodeSpecExt, OpcodeSpec};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeError {
TruncatedOpcode { pc: usize },
UnknownOpcode { pc: usize, opcode: u16 },
TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize },
ImmediateSizeMismatch { pc: usize, opcode: OpCode, expected: usize, actual: usize },
}
#[derive(Debug, Clone, Copy)]
pub struct DecodedInstr<'a> {
pub opcode: OpCode,
pub pc: usize,
pub next_pc: usize,
/// Raw immediate bytes slice, guaranteed to have length `opcode.spec().imm_bytes`.
pub imm: &'a [u8],
}
impl<'a> DecodedInstr<'a> {
#[inline]
fn ensure_len(&self, expected: usize) -> Result<(), DecodeError> {
if self.imm.len() != expected {
return Err(DecodeError::ImmediateSizeMismatch {
pc: self.pc,
opcode: self.opcode,
expected,
actual: self.imm.len(),
});
}
Ok(())
}
#[inline]
pub fn imm_u8(&self) -> Result<u8, DecodeError> {
self.ensure_len(1)?;
Ok(self.imm[0])
}
#[inline]
pub fn imm_u16(&self) -> Result<u16, DecodeError> {
self.ensure_len(2)?;
Ok(u16::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_u32(&self) -> Result<u32, DecodeError> {
self.ensure_len(4)?;
Ok(u32::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_i32(&self) -> Result<i32, DecodeError> {
self.ensure_len(4)?;
Ok(i32::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_i64(&self) -> Result<i64, DecodeError> {
self.ensure_len(8)?;
Ok(i64::from_le_bytes(self.imm.try_into().unwrap()))
}
#[inline]
pub fn imm_f64(&self) -> Result<f64, DecodeError> {
self.ensure_len(8)?;
Ok(f64::from_le_bytes(self.imm.try_into().unwrap()))
}
/// Helper for opcodes carrying two u32 values packed in 8 bytes (e.g., ALLOC meta).
#[inline]
pub fn imm_u32x2(&self) -> Result<(u32, u32), DecodeError> {
self.ensure_len(8)?;
let a = u32::from_le_bytes(self.imm[0..4].try_into().unwrap());
let b = u32::from_le_bytes(self.imm[4..8].try_into().unwrap());
Ok((a, b))
}
}
/// Decodes the instruction at program counter `pc` from `bytes`.
/// Returns the decoded instruction with canonical `next_pc`.
#[inline]
pub fn decode_next<'a>(pc: usize, bytes: &'a [u8]) -> Result<DecodedInstr<'a>, DecodeError> {
if pc + 2 > bytes.len() {
return Err(DecodeError::TruncatedOpcode { pc });
}
let opcode_val = u16::from_le_bytes([bytes[pc], bytes[pc + 1]]);
let opcode = OpCode::try_from(opcode_val)
.map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?;
let spec: OpcodeSpec = opcode.spec();
let imm_start = pc + 2;
let imm_end = imm_start + (spec.imm_bytes as usize);
if imm_end > bytes.len() {
return Err(DecodeError::TruncatedImmediate {
pc,
opcode,
need: spec.imm_bytes as usize,
have: bytes.len().saturating_sub(imm_start),
});
}
Ok(DecodedInstr {
opcode,
pc,
next_pc: imm_end,
imm: &bytes[imm_start..imm_end],
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::asm::{assemble, Asm, Operand};
use crate::opcode::OpCode;
#[test]
fn decode_basic_no_imm() {
// Encode a NOP (0x0000)
let rom = assemble(&[Asm::Op(OpCode::Nop, vec![])]).unwrap();
let d = decode_next(0, &rom).unwrap();
assert_eq!(d.opcode, OpCode::Nop);
assert_eq!(d.pc, 0);
assert_eq!(d.next_pc, 2);
assert_eq!(d.imm.len(), 0);
}
#[test]
fn decode_with_u32_imm() {
// PUSH_CONST 0x11223344
let rom = assemble(&[Asm::Op(OpCode::PushConst, vec![Operand::U32(0x11223344)])]).unwrap();
let d = decode_next(0, &rom).unwrap();
assert_eq!(d.opcode, OpCode::PushConst);
assert_eq!(d.imm_u32().unwrap(), 0x11223344);
assert_eq!(d.next_pc, 2 + 4);
}
#[test]
fn decode_with_u8_imm() {
// PUSH_BOOL true
let rom = assemble(&[Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)])]).unwrap();
let d = decode_next(0, &rom).unwrap();
assert_eq!(d.opcode, OpCode::PushBool);
assert_eq!(d.imm.len(), 1);
assert_eq!(d.imm_u8().unwrap(), 1);
assert_eq!(d.next_pc, 2 + 1);
}
#[test]
fn decode_with_i64_and_f64() {
// PUSH_I64, PUSH_F64
let rom = assemble(&[
Asm::Op(OpCode::PushI64, vec![Operand::I64(-123)]),
Asm::Op(OpCode::PushF64, vec![Operand::F64(3.25)]),
]).unwrap();
let d0 = decode_next(0, &rom).unwrap();
assert_eq!(d0.opcode, OpCode::PushI64);
assert_eq!(d0.imm_i64().unwrap(), -123);
let d1 = decode_next(d0.next_pc, &rom).unwrap();
assert_eq!(d1.opcode, OpCode::PushF64);
assert!((d1.imm_f64().unwrap() - 3.25).abs() < 1e-12);
}
#[test]
fn decode_truncated() {
let rom: Vec<u8> = vec![0x00, 0x00]; // NOP complete
assert!(matches!(decode_next(1, &rom), Err(DecodeError::TruncatedOpcode { .. })));
}
#[test]
fn roundtrip_encode_decode_table() {
let rom = assemble(&[
Asm::Op(OpCode::Nop, vec![]),
Asm::Op(OpCode::PushConst, vec![Operand::U32(7)]),
Asm::Op(OpCode::Jmp, vec![Operand::U32(4)]),
Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]),
Asm::Op(OpCode::Halt, vec![]),
]).unwrap();
let mut pc = 0usize;
let mut decoded = Vec::new();
while pc < rom.len() {
let d = decode_next(pc, &rom).unwrap();
decoded.push(d.opcode);
pc = d.next_pc;
}
assert_eq!(decoded, vec![OpCode::Nop, OpCode::PushConst, OpCode::Jmp, OpCode::PushI64, OpCode::Halt]);
assert_eq!(pc, rom.len());
}
}

View File

@ -20,6 +20,7 @@ pub mod readwrite;
pub mod asm;
pub mod disasm;
pub mod layout;
pub mod decoder;
mod model;

View File

@ -1,47 +1,8 @@
use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::opcode_spec::{OpCodeSpecExt, OpcodeSpec};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeError {
TruncatedOpcode { pc: usize },
UnknownOpcode { pc: usize, opcode: u16 },
TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize },
}
#[derive(Debug, Clone)]
pub struct DecodedInstr<'a> {
pub opcode: OpCode,
pub spec: OpcodeSpec,
pub imm: &'a [u8],
pub next_pc: usize,
}
// Re-export canonical decoder from prometeu-bytecode to eliminate bespoke implementation in VM.
pub use prometeu_bytecode::decoder::{decode_next, DecodeError, DecodedInstr};
/// Backwards-compatible shim for legacy call sites; delegates to canonical decoder.
#[inline]
pub fn decode_at(rom: &[u8], pc: usize) -> Result<DecodedInstr<'_>, DecodeError> {
if pc + 2 > rom.len() {
return Err(DecodeError::TruncatedOpcode { pc });
}
let opcode_val = u16::from_le_bytes([rom[pc], rom[pc+1]]);
let opcode = OpCode::try_from(opcode_val).map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?;
let spec = opcode.spec();
let imm_start = pc + 2;
let imm_end = imm_start + spec.imm_bytes as usize;
if imm_end > rom.len() {
return Err(DecodeError::TruncatedImmediate {
pc,
opcode,
need: spec.imm_bytes as usize,
have: rom.len().saturating_sub(imm_start)
});
}
let imm = &rom[imm_start..imm_end];
Ok(DecodedInstr {
opcode,
spec,
imm,
next_pc: imm_end,
})
decode_next(pc, rom)
}

View File

@ -1,6 +1,7 @@
use prometeu_abi::syscalls::Syscall;
use crate::bytecode::decoder::{decode_at, DecodeError};
use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::opcode_spec::OpCodeSpecExt;
use prometeu_bytecode::FunctionMeta;
use prometeu_bytecode::layout;
use std::collections::{HashMap, HashSet, VecDeque};
@ -68,6 +69,8 @@ impl Verifier {
VerifierError::TruncatedOpcode { pc: func_start + pc },
DecodeError::TruncatedImmediate { pc: _, opcode, need, have } =>
VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need, have },
DecodeError::ImmediateSizeMismatch { pc: _, opcode, expected, actual } =>
VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need: expected, have: actual },
})?;
pc = instr.next_pc;
}
@ -87,16 +90,16 @@ impl Verifier {
while let Some(pc) = worklist.pop_front() {
let in_height = *stack_height_in.get(&pc).unwrap();
let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass
let spec = instr.spec;
let spec = instr.opcode.spec();
// Resolve dynamic pops/pushes
let (pops, pushes) = match instr.opcode {
OpCode::PopN => {
let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as u16;
let n = instr.imm_u32().unwrap() as u16;
(n, 0)
}
OpCode::Call => {
let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let func_id = instr.imm_u32().unwrap();
let callee = all_functions.get(func_id as usize).ok_or_else(|| {
VerifierError::InvalidFuncId { pc: func_start + pc, id: func_id }
})?;
@ -106,7 +109,7 @@ impl Verifier {
(func.return_slots, 0)
}
OpCode::Syscall => {
let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let id = instr.imm_u32().unwrap();
let syscall = Syscall::from_u32(id).ok_or_else(|| {
VerifierError::InvalidSyscallId { pc: func_start + pc, id }
})?;
@ -131,7 +134,7 @@ impl Verifier {
// Propagate to successors
if spec.is_branch {
// Canonical contract: branch immediate is RELATIVE to function start.
let target_rel = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let target_rel = instr.imm_u32().unwrap() as usize;
let func_end_abs = layout::function_end_from_next(all_functions, func_idx, code.len());
let func_len = func_end_abs - func_start;

View File

@ -379,12 +379,12 @@ impl VirtualMachine {
self.halted = true;
}
OpCode::Jmp => {
let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let target = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let func_start = self.call_stack.last().map(|f| self.program.functions[f.func_idx].code_offset as usize).unwrap_or(0);
self.pc = func_start + target;
}
OpCode::JmpIfFalse => {
let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let target = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
match val {
Value::Boolean(false) => {
@ -398,7 +398,7 @@ impl VirtualMachine {
}
}
OpCode::JmpIfTrue => {
let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let target = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
match val {
Value::Boolean(true) => {
@ -415,38 +415,38 @@ impl VirtualMachine {
// Handled in run_budget for interruption
}
OpCode::PushConst => {
let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let idx = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let val = self.program.constant_pool.get(idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid constant index".into()))?;
self.push(val);
}
OpCode::PushI64 => {
let val = i64::from_le_bytes(instr.imm[0..8].try_into().unwrap());
let val = instr.imm_i64().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
self.push(Value::Int64(val));
}
OpCode::PushI32 => {
let val = i32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let val = instr.imm_i32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
self.push(Value::Int32(val));
}
OpCode::PushBounded => {
let val = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let val = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
if val > 0xFFFF {
return Err(self.trap(TRAP_OOB, opcode as u16, format!("Bounded value overflow: {} > 0xFFFF", val), start_pc as u32));
}
self.push(Value::Bounded(val));
}
OpCode::PushF64 => {
let val = f64::from_le_bytes(instr.imm[0..8].try_into().unwrap());
let val = instr.imm_f64().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
self.push(Value::Float(val));
}
OpCode::PushBool => {
let val = instr.imm[0];
let val = instr.imm_u8().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
self.push(Value::Boolean(val != 0));
}
OpCode::Pop => {
self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
}
OpCode::PopN => {
let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let n = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
for _ in 0..n {
self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
}
@ -706,12 +706,12 @@ impl VirtualMachine {
}
}
OpCode::GetGlobal => {
let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let idx = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let val = self.globals.get(idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid global index".into()))?;
self.push(val);
}
OpCode::SetGlobal => {
let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let idx = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
if idx >= self.globals.len() {
self.globals.resize(idx + 1, Value::Null);
@ -719,7 +719,7 @@ impl VirtualMachine {
self.globals[idx] = val;
}
OpCode::GetLocal => {
let slot = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let slot = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?;
let func = &self.program.functions[frame.func_idx];
@ -731,7 +731,7 @@ impl VirtualMachine {
self.push(val);
}
OpCode::SetLocal => {
let slot = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let slot = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?;
let func = &self.program.functions[frame.func_idx];
@ -743,7 +743,7 @@ impl VirtualMachine {
self.operand_stack[stack_idx] = val;
}
OpCode::Call => {
let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let func_id = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let callee = self.program.functions.get(func_id).ok_or_else(|| {
self.trap(TRAP_INVALID_FUNC, opcode as u16, format!("Invalid func_id {}", func_id), start_pc as u32)
})?;
@ -808,8 +808,7 @@ impl VirtualMachine {
}
OpCode::Alloc => {
// Allocate a new gate with given type and number of slots.
let type_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let slots_u32 = u32::from_le_bytes(instr.imm[4..8].try_into().unwrap());
let (type_id, slots_u32) = instr.imm_u32x2().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
let slots = slots_u32 as usize;
// Bump-allocate on the heap and zero-initialize with Null.
@ -832,7 +831,7 @@ impl VirtualMachine {
self.push(Value::Gate(gate_id as usize));
}
OpCode::GateLoad => {
let offset = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let offset = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
if let Value::Gate(gid_usize) = ref_val {
let gid = gid_usize as GateId;
@ -855,7 +854,7 @@ impl VirtualMachine {
}
}
OpCode::GateStore => {
let offset = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
let offset = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))? as usize;
let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?;
if let Value::Gate(gid_usize) = ref_val {
@ -886,8 +885,7 @@ impl VirtualMachine {
}
OpCode::Syscall => {
let pc_at_syscall = start_pc as u32;
let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let id = instr.imm_u32().map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
let syscall = prometeu_abi::syscalls::Syscall::from_u32(id).ok_or_else(|| {
self.trap(prometeu_bytecode::abi::TRAP_INVALID_SYSCALL, OpCode::Syscall as u16, format!("Unknown syscall: 0x{:08X}", id), pc_at_syscall)
})?;