449 lines
17 KiB
Rust
449 lines
17 KiB
Rust
use prometeu_bytecode::FunctionMeta;
|
|
use prometeu_bytecode::isa::core::CoreOpCode as OpCode;
|
|
use prometeu_bytecode::isa::core::CoreOpCodeSpecExt as OpCodeSpecExt;
|
|
use prometeu_bytecode::{DecodeError, decode_next};
|
|
use prometeu_bytecode::{FunctionLayout, compute_function_layouts};
|
|
use prometeu_hal::syscalls::Syscall;
|
|
use std::collections::{HashMap, HashSet, VecDeque};
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum VerifierError {
|
|
UnknownOpcode { pc: usize, opcode: u16 },
|
|
TruncatedOpcode { pc: usize },
|
|
TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize },
|
|
InvalidJumpTarget { pc: usize, target: usize },
|
|
JumpToMidInstruction { pc: usize, target: usize },
|
|
StackUnderflow { pc: usize, opcode: OpCode },
|
|
StackMismatchJoin { pc: usize, target: usize, height_in: u16, height_target: u16 },
|
|
BadRetStackHeight { pc: usize, height: u16, expected: u16 },
|
|
FunctionOutOfBounds { func_idx: usize, start: usize, end: usize, code_len: usize },
|
|
InvalidSyscallId { pc: usize, id: u32 },
|
|
TrailingBytes { func_idx: usize, at_pc: usize },
|
|
InvalidFuncId { pc: usize, id: u32 },
|
|
}
|
|
|
|
pub struct Verifier;
|
|
|
|
impl Verifier {
|
|
pub fn verify(code: &[u8], functions: &[FunctionMeta]) -> Result<Vec<u16>, VerifierError> {
|
|
let mut max_stacks = Vec::with_capacity(functions.len());
|
|
// Precompute function [start, end) ranges once for O(1) lookups
|
|
let layouts = compute_function_layouts(functions, code.len());
|
|
for (i, func) in functions.iter().enumerate() {
|
|
max_stacks.push(Self::verify_function(code, func, i, functions, &layouts)?);
|
|
}
|
|
Ok(max_stacks)
|
|
}
|
|
|
|
fn verify_function(
|
|
code: &[u8],
|
|
func: &FunctionMeta,
|
|
func_idx: usize,
|
|
all_functions: &[FunctionMeta],
|
|
layouts: &[FunctionLayout],
|
|
) -> Result<u16, VerifierError> {
|
|
let func_start = func.code_offset as usize;
|
|
// Use precomputed canonical range end
|
|
let func_end = layouts.get(func_idx).map(|l| l.end).unwrap_or_else(|| code.len());
|
|
|
|
if func_start > code.len() || func_end > code.len() || func_start > func_end {
|
|
return Err(VerifierError::FunctionOutOfBounds {
|
|
func_idx,
|
|
start: func_start,
|
|
end: func_end,
|
|
code_len: code.len(),
|
|
});
|
|
}
|
|
|
|
let func_code = &code[func_start..func_end];
|
|
|
|
// Funções vazias (sem qualquer byte de código) são consideradas válidas no verificador.
|
|
// Elas não consomem nem produzem valores na pilha e não possuem fluxo interno.
|
|
// Observação: se uma função vazia for chamada em tempo de execução e retorno/efeitos
|
|
// forem esperados, caberá ao gerador de código/linker impedir tal situação.
|
|
if func_code.is_empty() {
|
|
return Ok(0);
|
|
}
|
|
|
|
// First pass: find all valid instruction boundaries
|
|
let mut valid_pc = HashSet::new();
|
|
let mut pc = 0;
|
|
while pc < func_code.len() {
|
|
valid_pc.insert(pc);
|
|
let instr = decode_next(pc, func_code).map_err(|e| match e {
|
|
DecodeError::UnknownOpcode { pc: _, opcode } => {
|
|
VerifierError::UnknownOpcode { pc: func_start + pc, opcode }
|
|
}
|
|
DecodeError::TruncatedOpcode { pc: _ } => {
|
|
VerifierError::TruncatedOpcode { pc: func_start + pc }
|
|
}
|
|
DecodeError::TruncatedImmediate { pc: _, opcode, need, have } => {
|
|
VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need, have }
|
|
}
|
|
DecodeError::ImmediateSizeMismatch { pc: _, opcode, expected, actual } => {
|
|
VerifierError::TruncatedImmediate {
|
|
pc: func_start + pc,
|
|
opcode,
|
|
need: expected,
|
|
have: actual,
|
|
}
|
|
}
|
|
})?;
|
|
pc = instr.next_pc;
|
|
}
|
|
|
|
if pc != func_code.len() {
|
|
return Err(VerifierError::TrailingBytes { func_idx, at_pc: func_start + pc });
|
|
}
|
|
|
|
let mut stack_height_in: HashMap<usize, u16> = HashMap::new();
|
|
let mut worklist = VecDeque::new();
|
|
let mut max_stack: u16 = 0;
|
|
|
|
// Start from function entry
|
|
stack_height_in.insert(0, 0);
|
|
worklist.push_back(0);
|
|
|
|
while let Some(pc) = worklist.pop_front() {
|
|
let in_height = *stack_height_in.get(&pc).unwrap();
|
|
let instr = decode_next(pc, func_code).unwrap(); // Guaranteed to succeed due to first pass
|
|
let spec = instr.opcode.spec();
|
|
|
|
// Resolve dynamic pops/pushes
|
|
let (pops, pushes) = match instr.opcode {
|
|
OpCode::PopN => {
|
|
let n = instr.imm_u32().unwrap() as u16;
|
|
(n, 0)
|
|
}
|
|
OpCode::Call => {
|
|
let func_id = instr.imm_u32().unwrap();
|
|
let callee = all_functions.get(func_id as usize).ok_or_else(|| {
|
|
VerifierError::InvalidFuncId { pc: func_start + pc, id: func_id }
|
|
})?;
|
|
(callee.param_slots, callee.return_slots)
|
|
}
|
|
OpCode::Ret => (func.return_slots, 0),
|
|
OpCode::Syscall => {
|
|
let id = instr.imm_u32().unwrap();
|
|
let syscall = Syscall::from_u32(id).ok_or_else(|| {
|
|
VerifierError::InvalidSyscallId { pc: func_start + pc, id }
|
|
})?;
|
|
(syscall.args_count() as u16, syscall.results_count() as u16)
|
|
}
|
|
_ => (spec.pops, spec.pushes),
|
|
};
|
|
|
|
if in_height < pops {
|
|
return Err(VerifierError::StackUnderflow {
|
|
pc: func_start + pc,
|
|
opcode: instr.opcode,
|
|
});
|
|
}
|
|
|
|
let out_height = in_height - pops + pushes;
|
|
max_stack = max_stack.max(out_height);
|
|
|
|
if instr.opcode == OpCode::Ret {
|
|
if in_height != func.return_slots {
|
|
return Err(VerifierError::BadRetStackHeight {
|
|
pc: func_start + pc,
|
|
height: in_height,
|
|
expected: func.return_slots,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Propagate to successors
|
|
if spec.is_branch {
|
|
// Canonical contract: branch immediate is RELATIVE to function start.
|
|
let target_rel = instr.imm_u32().unwrap() as usize;
|
|
let func_end_abs =
|
|
layouts.get(func_idx).map(|l| l.end).unwrap_or_else(|| code.len());
|
|
let func_len = func_end_abs - func_start;
|
|
|
|
if target_rel > func_len {
|
|
// Mandatory structured diagnostic for InvalidJumpTarget
|
|
let pc_abs = func_start + pc;
|
|
let opcode = instr.opcode;
|
|
let imm_raw = instr.imm_u32().unwrap();
|
|
let func_start_abs = func_start;
|
|
let func_end_abs_log = func_end_abs;
|
|
let target_abs_expected = func_start_abs + target_rel;
|
|
let is_boundary_target_rel = valid_pc.contains(&target_rel);
|
|
eprintln!(
|
|
"[VERIFIER] invalid jump: pc_abs={} opcode={:?} imm_raw={} func=F{} start={} end={} len={} target_rel={} target_abs_expected={} boundary_rel={}",
|
|
pc_abs,
|
|
opcode,
|
|
imm_raw,
|
|
func_idx,
|
|
func_start_abs,
|
|
func_end_abs_log,
|
|
func_len,
|
|
target_rel,
|
|
target_abs_expected,
|
|
is_boundary_target_rel
|
|
);
|
|
return Err(VerifierError::InvalidJumpTarget {
|
|
pc: pc_abs,
|
|
target: target_abs_expected,
|
|
});
|
|
}
|
|
|
|
if target_rel == func_len {
|
|
// salto para o fim da função
|
|
if out_height != func.return_slots {
|
|
return Err(VerifierError::BadRetStackHeight {
|
|
pc: func_start + pc,
|
|
height: out_height,
|
|
expected: func.return_slots,
|
|
});
|
|
}
|
|
// caminho termina aqui
|
|
} else {
|
|
if !valid_pc.contains(&target_rel) {
|
|
return Err(VerifierError::JumpToMidInstruction {
|
|
pc: func_start + pc,
|
|
target: func_start + target_rel,
|
|
});
|
|
}
|
|
|
|
if let Some(&existing_height) = stack_height_in.get(&target_rel) {
|
|
if existing_height != out_height {
|
|
return Err(VerifierError::StackMismatchJoin {
|
|
pc: func_start + pc,
|
|
target: func_start + target_rel,
|
|
height_in: out_height,
|
|
height_target: existing_height,
|
|
});
|
|
}
|
|
} else {
|
|
stack_height_in.insert(target_rel, out_height);
|
|
worklist.push_back(target_rel);
|
|
}
|
|
}
|
|
}
|
|
|
|
if !spec.is_terminator {
|
|
let next_pc = instr.next_pc;
|
|
let func_len = layouts.get(func_idx).map(|l| l.end - l.start).unwrap_or_else(|| 0);
|
|
if next_pc < func_len {
|
|
if let Some(&existing_height) = stack_height_in.get(&next_pc) {
|
|
if existing_height != out_height {
|
|
return Err(VerifierError::StackMismatchJoin {
|
|
pc: func_start + pc,
|
|
target: func_start + next_pc,
|
|
height_in: out_height,
|
|
height_target: existing_height,
|
|
});
|
|
}
|
|
} else {
|
|
stack_height_in.insert(next_pc, out_height);
|
|
worklist.push_back(next_pc);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(max_stack)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_verifier_underflow() {
|
|
// OpCode::Add (2 bytes)
|
|
let code = vec![OpCode::Add as u8, 0x00];
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 2, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Add }));
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_dup_underflow() {
|
|
let code =
|
|
vec![(OpCode::Dup as u16).to_le_bytes()[0], (OpCode::Dup as u16).to_le_bytes()[1]];
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 2, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Dup }));
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_invalid_jmp_target() {
|
|
// Jmp (2 bytes) + 100u32 (4 bytes)
|
|
let mut code = vec![OpCode::Jmp as u8, 0x00];
|
|
code.extend_from_slice(&100u32.to_le_bytes());
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 6, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::InvalidJumpTarget { pc: 0, target: 100 }));
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_jmp_to_mid_instr() {
|
|
// PushI32 (2 bytes) + 42u32 (4 bytes)
|
|
// Jmp 1 (middle of PushI32)
|
|
let mut code = vec![OpCode::PushI32 as u8, 0x00];
|
|
code.extend_from_slice(&42u32.to_le_bytes());
|
|
code.push(OpCode::Jmp as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&1u32.to_le_bytes());
|
|
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 12, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::JumpToMidInstruction { pc: 6, target: 1 }));
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_jump_to_end_ok() {
|
|
// Single-instruction function where JMP targets exactly func_len (end-exclusive)
|
|
// Encoding: [JMP][u32 imm], with imm == total function length (6 bytes)
|
|
let mut code = Vec::new();
|
|
code.push(OpCode::Jmp as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&6u32.to_le_bytes());
|
|
|
|
let functions = vec![FunctionMeta {
|
|
code_offset: 0,
|
|
code_len: 6,
|
|
return_slots: 0,
|
|
..Default::default()
|
|
}];
|
|
|
|
let res = Verifier::verify(&code, &functions).unwrap();
|
|
// No stack usage; max stack remains 0
|
|
assert_eq!(res[0], 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_truncation_opcode() {
|
|
let code = vec![OpCode::PushI32 as u8]; // Truncated u16 opcode
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 1, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::TruncatedOpcode { pc: 0 }));
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_truncation_immediate() {
|
|
let mut code = vec![OpCode::PushI32 as u8, 0x00];
|
|
code.push(0x42); // Only 1 byte of 4-byte immediate
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 3, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(
|
|
res,
|
|
Err(VerifierError::TruncatedImmediate {
|
|
pc: 0,
|
|
opcode: OpCode::PushI32,
|
|
need: 4,
|
|
have: 1
|
|
})
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_stack_mismatch_join() {
|
|
// Let's make it reachable:
|
|
// 0: PushBool true
|
|
// 3: JmpIfTrue 15
|
|
// 9: Jmp 27
|
|
// 15: PushI32 1
|
|
// 21: Jmp 27
|
|
// 27: Nop
|
|
|
|
let mut code = Vec::new();
|
|
code.push(OpCode::PushBool as u8);
|
|
code.push(0x00);
|
|
code.push(1); // 0: PushBool (3 bytes)
|
|
code.push(OpCode::JmpIfTrue as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&15u32.to_le_bytes()); // 3: JmpIfTrue (6 bytes)
|
|
code.push(OpCode::Jmp as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&27u32.to_le_bytes()); // 9: Jmp (6 bytes)
|
|
code.push(OpCode::PushI32 as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&1u32.to_le_bytes()); // 15: PushI32 (6 bytes)
|
|
code.push(OpCode::Jmp as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&27u32.to_le_bytes()); // 21: Jmp (6 bytes)
|
|
code.push(OpCode::Nop as u8);
|
|
code.push(0x00); // 27: Nop (2 bytes)
|
|
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 29, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
// Path 0->3->9->27: height 1-1+0 = 0.
|
|
// Path 0->3->15->21->27: height 1-1+1 = 1.
|
|
// Mismatch at 27: 0 vs 1.
|
|
|
|
assert_eq!(
|
|
res,
|
|
Err(VerifierError::StackMismatchJoin {
|
|
pc: 21,
|
|
target: 27,
|
|
height_in: 1,
|
|
height_target: 0
|
|
})
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_bad_ret_height() {
|
|
// PushI32 1 (6 bytes)
|
|
// Ret (2 bytes)
|
|
let mut code = vec![OpCode::PushI32 as u8, 0x00];
|
|
code.extend_from_slice(&1u32.to_le_bytes());
|
|
code.push(OpCode::Ret as u8);
|
|
code.push(0x00);
|
|
|
|
let functions = vec![FunctionMeta {
|
|
code_offset: 0,
|
|
code_len: 8,
|
|
return_slots: 0, // Expected 0, but got 1
|
|
..Default::default()
|
|
}];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::BadRetStackHeight { pc: 6, height: 1, expected: 0 }));
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_max_stack() {
|
|
// PushI32 1
|
|
// PushI32 2
|
|
// Add
|
|
// Ret
|
|
let mut code = Vec::new();
|
|
code.push(OpCode::PushI32 as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&1u32.to_le_bytes());
|
|
code.push(OpCode::PushI32 as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&2u32.to_le_bytes());
|
|
code.push(OpCode::Add as u8);
|
|
code.push(0x00);
|
|
code.push(OpCode::Ret as u8);
|
|
code.push(0x00);
|
|
|
|
let functions = vec![FunctionMeta {
|
|
code_offset: 0,
|
|
code_len: 16,
|
|
return_slots: 1,
|
|
..Default::default()
|
|
}];
|
|
let res = Verifier::verify(&code, &functions).unwrap();
|
|
assert_eq!(res[0], 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_verifier_invalid_syscall_id() {
|
|
let mut code = Vec::new();
|
|
code.push(OpCode::Syscall as u8);
|
|
code.push(0x00);
|
|
code.extend_from_slice(&0xDEADBEEFu32.to_le_bytes()); // Unknown ID
|
|
|
|
let functions = vec![FunctionMeta { code_offset: 0, code_len: 6, ..Default::default() }];
|
|
let res = Verifier::verify(&code, &functions);
|
|
assert_eq!(res, Err(VerifierError::InvalidSyscallId { pc: 0, id: 0xDEADBEEF }));
|
|
}
|
|
}
|