use crate::virtual_machine::bytecode::decoder::{decode_at, DecodeError}; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::FunctionMeta; use std::collections::{HashMap, HashSet, VecDeque}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum VerifierError { UnknownOpcode { pc: usize, opcode: u16 }, TruncatedOpcode { pc: usize }, TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize }, InvalidJumpTarget { pc: usize, target: usize }, JumpToMidInstruction { pc: usize, target: usize }, StackUnderflow { pc: usize, opcode: OpCode }, StackMismatchJoin { pc: usize, target: usize, height_in: u16, height_target: u16 }, BadRetStackHeight { pc: usize, height: u16, expected: u16 }, FunctionOutOfBounds { func_idx: usize, start: usize, end: usize, code_len: usize }, InvalidSyscallId { pc: usize, id: u32 }, TrailingBytes { func_idx: usize, at_pc: usize }, InvalidFuncId { pc: usize, id: u32 }, } pub struct Verifier; impl Verifier { pub fn verify(code: &[u8], functions: &[FunctionMeta]) -> Result, VerifierError> { let mut max_stacks = Vec::with_capacity(functions.len()); for (i, func) in functions.iter().enumerate() { max_stacks.push(Self::verify_function(code, func, i, functions)?); } Ok(max_stacks) } fn verify_function(code: &[u8], func: &FunctionMeta, func_idx: usize, all_functions: &[FunctionMeta]) -> Result { let func_start = func.code_offset as usize; let func_end = func_start + func.code_len as usize; if func_start > code.len() || func_end > code.len() || func_start > func_end { return Err(VerifierError::FunctionOutOfBounds { func_idx, start: func_start, end: func_end, code_len: code.len(), }); } let func_code = &code[func_start..func_end]; // First pass: find all valid instruction boundaries let mut valid_pc = HashSet::new(); let mut pc = 0; while pc < func_code.len() { valid_pc.insert(pc); let instr = decode_at(func_code, pc).map_err(|e| match e { DecodeError::UnknownOpcode { pc: _, opcode } => VerifierError::UnknownOpcode { pc: func_start + pc, opcode }, DecodeError::TruncatedOpcode { pc: _ } => VerifierError::TruncatedOpcode { pc: func_start + pc }, DecodeError::TruncatedImmediate { pc: _, opcode, need, have } => VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need, have }, })?; pc = instr.next_pc; } if pc != func_code.len() { return Err(VerifierError::TrailingBytes { func_idx, at_pc: func_start + pc }); } let mut stack_height_in: HashMap = HashMap::new(); let mut worklist = VecDeque::new(); let mut max_stack: u16 = 0; // Start from function entry stack_height_in.insert(0, 0); worklist.push_back(0); while let Some(pc) = worklist.pop_front() { let in_height = *stack_height_in.get(&pc).unwrap(); let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass let spec = instr.spec; // Resolve dynamic pops/pushes let (pops, pushes) = match instr.opcode { OpCode::PopN => { let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as u16; (n, 0) } OpCode::Call => { let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); let callee = all_functions.get(func_id as usize).ok_or_else(|| { VerifierError::InvalidFuncId { pc: func_start + pc, id: func_id } })?; (callee.param_slots, callee.return_slots) } OpCode::Ret => { (func.return_slots, 0) } OpCode::Syscall => { let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); let syscall = crate::hardware::syscalls::Syscall::from_u32(id).ok_or_else(|| { VerifierError::InvalidSyscallId { pc: func_start + pc, id } })?; (syscall.args_count() as u16, syscall.results_count() as u16) } _ => (spec.pops, spec.pushes), }; if in_height < pops { return Err(VerifierError::StackUnderflow { pc: func_start + pc, opcode: instr.opcode }); } let out_height = in_height - pops + pushes; max_stack = max_stack.max(out_height); if instr.opcode == OpCode::Ret { if in_height != func.return_slots { return Err(VerifierError::BadRetStackHeight { pc: func_start + pc, height: in_height, expected: func.return_slots }); } } // Propagate to successors if spec.is_branch { let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; if target >= func.code_len as usize { return Err(VerifierError::InvalidJumpTarget { pc: func_start + pc, target: func_start + target }); } if !valid_pc.contains(&target) { return Err(VerifierError::JumpToMidInstruction { pc: func_start + pc, target: func_start + target }); } if let Some(&existing_height) = stack_height_in.get(&target) { if existing_height != out_height { return Err(VerifierError::StackMismatchJoin { pc: func_start + pc, target: func_start + target, height_in: out_height, height_target: existing_height }); } } else { stack_height_in.insert(target, out_height); worklist.push_back(target); } } if !spec.is_terminator { let next_pc = instr.next_pc; if next_pc < func.code_len as usize { if let Some(&existing_height) = stack_height_in.get(&next_pc) { if existing_height != out_height { return Err(VerifierError::StackMismatchJoin { pc: func_start + pc, target: func_start + next_pc, height_in: out_height, height_target: existing_height }); } } else { stack_height_in.insert(next_pc, out_height); worklist.push_back(next_pc); } } } } Ok(max_stack) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_verifier_underflow() { // OpCode::Add (2 bytes) let code = vec![OpCode::Add as u8, 0x00]; let functions = vec![FunctionMeta { code_offset: 0, code_len: 2, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Add })); } #[test] fn test_verifier_dup_underflow() { let code = vec![(OpCode::Dup as u16).to_le_bytes()[0], (OpCode::Dup as u16).to_le_bytes()[1]]; let functions = vec![FunctionMeta { code_offset: 0, code_len: 2, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Dup })); } #[test] fn test_verifier_invalid_jmp_target() { // Jmp (2 bytes) + 100u32 (4 bytes) let mut code = vec![OpCode::Jmp as u8, 0x00]; code.extend_from_slice(&100u32.to_le_bytes()); let functions = vec![FunctionMeta { code_offset: 0, code_len: 6, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::InvalidJumpTarget { pc: 0, target: 100 })); } #[test] fn test_verifier_jmp_to_mid_instr() { // PushI32 (2 bytes) + 42u32 (4 bytes) // Jmp 1 (middle of PushI32) let mut code = vec![OpCode::PushI32 as u8, 0x00]; code.extend_from_slice(&42u32.to_le_bytes()); code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes()); let functions = vec![FunctionMeta { code_offset: 0, code_len: 12, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::JumpToMidInstruction { pc: 6, target: 1 })); } #[test] fn test_verifier_truncation_opcode() { let code = vec![OpCode::PushI32 as u8]; // Truncated u16 opcode let functions = vec![FunctionMeta { code_offset: 0, code_len: 1, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::TruncatedOpcode { pc: 0 })); } #[test] fn test_verifier_truncation_immediate() { let mut code = vec![OpCode::PushI32 as u8, 0x00]; code.push(0x42); // Only 1 byte of 4-byte immediate let functions = vec![FunctionMeta { code_offset: 0, code_len: 3, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::TruncatedImmediate { pc: 0, opcode: OpCode::PushI32, need: 4, have: 1 })); } #[test] fn test_verifier_stack_mismatch_join() { // Let's make it reachable: // 0: PushBool true // 3: JmpIfTrue 15 // 9: Jmp 27 // 15: PushI32 1 // 21: Jmp 27 // 27: Nop let mut code = Vec::new(); code.push(OpCode::PushBool as u8); code.push(0x00); code.push(1); // 0: PushBool (3 bytes) code.push(OpCode::JmpIfTrue as u8); code.push(0x00); code.extend_from_slice(&15u32.to_le_bytes()); // 3: JmpIfTrue (6 bytes) code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&27u32.to_le_bytes()); // 9: Jmp (6 bytes) code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes()); // 15: PushI32 (6 bytes) code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&27u32.to_le_bytes()); // 21: Jmp (6 bytes) code.push(OpCode::Nop as u8); code.push(0x00); // 27: Nop (2 bytes) let functions = vec![FunctionMeta { code_offset: 0, code_len: 29, ..Default::default() }]; let res = Verifier::verify(&code, &functions); // Path 0->3->9->27: height 1-1+0 = 0. // Path 0->3->15->21->27: height 1-1+1 = 1. // Mismatch at 27: 0 vs 1. assert_eq!(res, Err(VerifierError::StackMismatchJoin { pc: 21, target: 27, height_in: 1, height_target: 0 })); } #[test] fn test_verifier_bad_ret_height() { // PushI32 1 (6 bytes) // Ret (2 bytes) let mut code = vec![OpCode::PushI32 as u8, 0x00]; code.extend_from_slice(&1u32.to_le_bytes()); code.push(OpCode::Ret as u8); code.push(0x00); let functions = vec![FunctionMeta { code_offset: 0, code_len: 8, return_slots: 0, // Expected 0, but got 1 ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::BadRetStackHeight { pc: 6, height: 1, expected: 0 })); } #[test] fn test_verifier_max_stack() { // PushI32 1 // PushI32 2 // Add // Ret let mut code = Vec::new(); code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes()); code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&2u32.to_le_bytes()); code.push(OpCode::Add as u8); code.push(0x00); code.push(OpCode::Ret as u8); code.push(0x00); let functions = vec![FunctionMeta { code_offset: 0, code_len: 16, return_slots: 1, ..Default::default() }]; let res = Verifier::verify(&code, &functions).unwrap(); assert_eq!(res[0], 2); } #[test] fn test_verifier_invalid_syscall_id() { let mut code = Vec::new(); code.push(OpCode::Syscall as u8); code.push(0x00); code.extend_from_slice(&0xDEADBEEFu32.to_le_bytes()); // Unknown ID let functions = vec![FunctionMeta { code_offset: 0, code_len: 6, ..Default::default() }]; let res = Verifier::verify(&code, &functions); assert_eq!(res, Err(VerifierError::InvalidSyscallId { pc: 0, id: 0xDEADBEEF })); } }