bQUARKz f9120e740b
dev/pbs (#8)
Co-authored-by: Nilton Constantino <nilton.constantino@visma.com>
Reviewed-on: #8
2026-03-24 13:40:22 +00:00

330 lines
13 KiB
Rust

use crate::virtual_machine::bytecode::decoder::{decode_at, DecodeError};
use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::FunctionMeta;
use std::collections::{HashMap, HashSet, VecDeque};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum VerifierError {
UnknownOpcode { pc: usize, opcode: u16 },
TruncatedOpcode { pc: usize },
TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize },
InvalidJumpTarget { pc: usize, target: usize },
JumpToMidInstruction { pc: usize, target: usize },
StackUnderflow { pc: usize, opcode: OpCode },
StackMismatchJoin { pc: usize, target: usize, height_in: u16, height_target: u16 },
BadRetStackHeight { pc: usize, height: u16, expected: u16 },
FunctionOutOfBounds { func_idx: usize, start: usize, end: usize, code_len: usize },
InvalidSyscallId { pc: usize, id: u32 },
TrailingBytes { func_idx: usize, at_pc: usize },
InvalidFuncId { pc: usize, id: u32 },
}
pub struct Verifier;
impl Verifier {
pub fn verify(code: &[u8], functions: &[FunctionMeta]) -> Result<Vec<u16>, VerifierError> {
let mut max_stacks = Vec::with_capacity(functions.len());
for (i, func) in functions.iter().enumerate() {
max_stacks.push(Self::verify_function(code, func, i, functions)?);
}
Ok(max_stacks)
}
fn verify_function(code: &[u8], func: &FunctionMeta, func_idx: usize, all_functions: &[FunctionMeta]) -> Result<u16, VerifierError> {
let func_start = func.code_offset as usize;
let func_end = func_start + func.code_len as usize;
if func_start > code.len() || func_end > code.len() || func_start > func_end {
return Err(VerifierError::FunctionOutOfBounds {
func_idx,
start: func_start,
end: func_end,
code_len: code.len(),
});
}
let func_code = &code[func_start..func_end];
// First pass: find all valid instruction boundaries
let mut valid_pc = HashSet::new();
let mut pc = 0;
while pc < func_code.len() {
valid_pc.insert(pc);
let instr = decode_at(func_code, pc).map_err(|e| match e {
DecodeError::UnknownOpcode { pc: _, opcode } =>
VerifierError::UnknownOpcode { pc: func_start + pc, opcode },
DecodeError::TruncatedOpcode { pc: _ } =>
VerifierError::TruncatedOpcode { pc: func_start + pc },
DecodeError::TruncatedImmediate { pc: _, opcode, need, have } =>
VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need, have },
})?;
pc = instr.next_pc;
}
if pc != func_code.len() {
return Err(VerifierError::TrailingBytes { func_idx, at_pc: func_start + pc });
}
let mut stack_height_in: HashMap<usize, u16> = HashMap::new();
let mut worklist = VecDeque::new();
let mut max_stack: u16 = 0;
// Start from function entry
stack_height_in.insert(0, 0);
worklist.push_back(0);
while let Some(pc) = worklist.pop_front() {
let in_height = *stack_height_in.get(&pc).unwrap();
let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass
let spec = instr.spec;
// Resolve dynamic pops/pushes
let (pops, pushes) = match instr.opcode {
OpCode::PopN => {
let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as u16;
(n, 0)
}
OpCode::Call => {
let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let callee = all_functions.get(func_id as usize).ok_or_else(|| {
VerifierError::InvalidFuncId { pc: func_start + pc, id: func_id }
})?;
(callee.param_slots, callee.return_slots)
}
OpCode::Ret => {
(func.return_slots, 0)
}
OpCode::Syscall => {
let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap());
let syscall = crate::hardware::syscalls::Syscall::from_u32(id).ok_or_else(|| {
VerifierError::InvalidSyscallId { pc: func_start + pc, id }
})?;
(syscall.args_count() as u16, syscall.results_count() as u16)
}
_ => (spec.pops, spec.pushes),
};
if in_height < pops {
return Err(VerifierError::StackUnderflow { pc: func_start + pc, opcode: instr.opcode });
}
let out_height = in_height - pops + pushes;
max_stack = max_stack.max(out_height);
if instr.opcode == OpCode::Ret {
if in_height != func.return_slots {
return Err(VerifierError::BadRetStackHeight { pc: func_start + pc, height: in_height, expected: func.return_slots });
}
}
// Propagate to successors
if spec.is_branch {
let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize;
if target >= func.code_len as usize {
return Err(VerifierError::InvalidJumpTarget { pc: func_start + pc, target: func_start + target });
}
if !valid_pc.contains(&target) {
return Err(VerifierError::JumpToMidInstruction { pc: func_start + pc, target: func_start + target });
}
if let Some(&existing_height) = stack_height_in.get(&target) {
if existing_height != out_height {
return Err(VerifierError::StackMismatchJoin { pc: func_start + pc, target: func_start + target, height_in: out_height, height_target: existing_height });
}
} else {
stack_height_in.insert(target, out_height);
worklist.push_back(target);
}
}
if !spec.is_terminator {
let next_pc = instr.next_pc;
if next_pc < func.code_len as usize {
if let Some(&existing_height) = stack_height_in.get(&next_pc) {
if existing_height != out_height {
return Err(VerifierError::StackMismatchJoin { pc: func_start + pc, target: func_start + next_pc, height_in: out_height, height_target: existing_height });
}
} else {
stack_height_in.insert(next_pc, out_height);
worklist.push_back(next_pc);
}
}
}
}
Ok(max_stack)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_verifier_underflow() {
// OpCode::Add (2 bytes)
let code = vec![OpCode::Add as u8, 0x00];
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 2,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Add }));
}
#[test]
fn test_verifier_dup_underflow() {
let code = vec![(OpCode::Dup as u16).to_le_bytes()[0], (OpCode::Dup as u16).to_le_bytes()[1]];
let functions = vec![FunctionMeta { code_offset: 0, code_len: 2, ..Default::default() }];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Dup }));
}
#[test]
fn test_verifier_invalid_jmp_target() {
// Jmp (2 bytes) + 100u32 (4 bytes)
let mut code = vec![OpCode::Jmp as u8, 0x00];
code.extend_from_slice(&100u32.to_le_bytes());
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 6,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::InvalidJumpTarget { pc: 0, target: 100 }));
}
#[test]
fn test_verifier_jmp_to_mid_instr() {
// PushI32 (2 bytes) + 42u32 (4 bytes)
// Jmp 1 (middle of PushI32)
let mut code = vec![OpCode::PushI32 as u8, 0x00];
code.extend_from_slice(&42u32.to_le_bytes());
code.push(OpCode::Jmp as u8);
code.push(0x00);
code.extend_from_slice(&1u32.to_le_bytes());
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 12,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::JumpToMidInstruction { pc: 6, target: 1 }));
}
#[test]
fn test_verifier_truncation_opcode() {
let code = vec![OpCode::PushI32 as u8]; // Truncated u16 opcode
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 1,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::TruncatedOpcode { pc: 0 }));
}
#[test]
fn test_verifier_truncation_immediate() {
let mut code = vec![OpCode::PushI32 as u8, 0x00];
code.push(0x42); // Only 1 byte of 4-byte immediate
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 3,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::TruncatedImmediate { pc: 0, opcode: OpCode::PushI32, need: 4, have: 1 }));
}
#[test]
fn test_verifier_stack_mismatch_join() {
// Let's make it reachable:
// 0: PushBool true
// 3: JmpIfTrue 15
// 9: Jmp 27
// 15: PushI32 1
// 21: Jmp 27
// 27: Nop
let mut code = Vec::new();
code.push(OpCode::PushBool as u8); code.push(0x00); code.push(1); // 0: PushBool (3 bytes)
code.push(OpCode::JmpIfTrue as u8); code.push(0x00); code.extend_from_slice(&15u32.to_le_bytes()); // 3: JmpIfTrue (6 bytes)
code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&27u32.to_le_bytes()); // 9: Jmp (6 bytes)
code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes()); // 15: PushI32 (6 bytes)
code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&27u32.to_le_bytes()); // 21: Jmp (6 bytes)
code.push(OpCode::Nop as u8); code.push(0x00); // 27: Nop (2 bytes)
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 29,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
// Path 0->3->9->27: height 1-1+0 = 0.
// Path 0->3->15->21->27: height 1-1+1 = 1.
// Mismatch at 27: 0 vs 1.
assert_eq!(res, Err(VerifierError::StackMismatchJoin { pc: 21, target: 27, height_in: 1, height_target: 0 }));
}
#[test]
fn test_verifier_bad_ret_height() {
// PushI32 1 (6 bytes)
// Ret (2 bytes)
let mut code = vec![OpCode::PushI32 as u8, 0x00];
code.extend_from_slice(&1u32.to_le_bytes());
code.push(OpCode::Ret as u8);
code.push(0x00);
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 8,
return_slots: 0, // Expected 0, but got 1
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::BadRetStackHeight { pc: 6, height: 1, expected: 0 }));
}
#[test]
fn test_verifier_max_stack() {
// PushI32 1
// PushI32 2
// Add
// Ret
let mut code = Vec::new();
code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes());
code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&2u32.to_le_bytes());
code.push(OpCode::Add as u8); code.push(0x00);
code.push(OpCode::Ret as u8); code.push(0x00);
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 16,
return_slots: 1,
..Default::default()
}];
let res = Verifier::verify(&code, &functions).unwrap();
assert_eq!(res[0], 2);
}
#[test]
fn test_verifier_invalid_syscall_id() {
let mut code = Vec::new();
code.push(OpCode::Syscall as u8); code.push(0x00);
code.extend_from_slice(&0xDEADBEEFu32.to_le_bytes()); // Unknown ID
let functions = vec![FunctionMeta {
code_offset: 0,
code_len: 6,
..Default::default()
}];
let res = Verifier::verify(&code, &functions);
assert_eq!(res, Err(VerifierError::InvalidSyscallId { pc: 0, id: 0xDEADBEEF }));
}
}