//! Shared bytecode layout utilities, used by both compiler (emitter/linker) //! and the VM (verifier/loader). This ensures a single source of truth for //! how function ranges, instruction boundaries, and pc→function lookups are //! interpreted post-link. use crate::decoder::decode_next; use crate::FunctionMeta; /// Returns the absolute end (exclusive) of the function at `func_idx`, /// defined as the minimum `code_offset` of any subsequent function, or /// `code_len_total` if this is the last function. #[inline] pub fn function_end_from_next(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize { let start = functions.get(func_idx).map(|f| f.code_offset as usize).unwrap_or(0); let mut end = code_len_total; for (j, other) in functions.iter().enumerate() { if j == func_idx { continue; } let other_start = other.code_offset as usize; if other_start > start && other_start < end { end = other_start; } } end } /// Returns the length (in bytes) of the function at `func_idx`, using /// the canonical definition: end = start of next function (exclusive), /// or total code len if last. #[inline] pub fn function_len_from_next(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize { let start = functions.get(func_idx).map(|f| f.code_offset as usize).unwrap_or(0); let end = function_end_from_next(functions, func_idx, code_len_total); end.saturating_sub(start) } /// Canonical function range [start, end) where `end` is the next function's /// `code_offset` or `code_len_total` if this is the last function. #[inline] pub fn function_range(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> (usize, usize) { let start = functions .get(func_idx) .map(|f| f.code_offset as usize) .unwrap_or(0); let end = function_end_from_next(functions, func_idx, code_len_total); (start, end) } /// Canonical function length (in bytes). #[inline] pub fn function_len(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize { function_len_from_next(functions, func_idx, code_len_total) } /// Recomputes all `code_len` values in place from the next function start /// (exclusive end), using the combined code buffer length for the last one. pub fn recompute_function_lengths_in_place(functions: &mut [FunctionMeta], code_len_total: usize) { for i in 0..functions.len() { let start = functions[i].code_offset as usize; let end = function_end_from_next(functions, i, code_len_total); functions[i].code_len = end.saturating_sub(start) as u32; } } /// Finds the function index that contains `pc_abs` (absolute), using the /// canonical ranges (end = next start, exclusive). Returns `None` if none. pub fn function_index_by_pc(functions: &[FunctionMeta], code_len_total: usize, pc_abs: usize) -> Option { for i in 0..functions.len() { let start = functions[i].code_offset as usize; let end = function_end_from_next(functions, i, code_len_total); if pc_abs >= start && pc_abs < end { return Some(i); } } None } /// Alias: canonical function lookup by absolute PC. #[inline] pub fn lookup_function_by_pc(functions: &[FunctionMeta], code_len_total: usize, pc_abs: usize) -> Option { function_index_by_pc(functions, code_len_total, pc_abs) } /// Returns true if `rel_pc` (relative to the function start) is a valid /// instruction boundary as determined by the canonical decoder. /// /// Contract: /// - `rel_pc == 0` is always a boundary if `func_idx` is valid. /// - Boundaries are computed by stepping with `decoder::decode_next` from the /// function start up to (and possibly past) `rel_pc` but never beyond the /// function exclusive end. /// - Any decode error before reaching `rel_pc` yields `false` (invalid program). pub fn is_boundary(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, func_idx: usize, rel_pc: usize) -> bool { let (start, end) = match functions.get(func_idx) { Some(_) => function_range(functions, func_idx, code_len_total), None => return false, }; let func_len = end.saturating_sub(start); if rel_pc == 0 { return true; } if rel_pc > func_len { return false; } let target = start + rel_pc; let mut pc = start; while pc < end { match decode_next(pc, code) { Ok(di) => { let next = di.next_pc; if next > end { return false; } if next == target { return true; } if next <= pc { return false; } // must make progress pc = next; if pc > target { return false; } } Err(_) => return false, } } // If we reached end without matching `target`, only boundary is exact end target == end } /// Returns true if `abs_pc` is a valid instruction boundary for the function /// containing it, according to the canonical decoder. Returns false if `abs_pc` /// is not within any function range or if decoding fails. pub fn is_boundary_abs(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, abs_pc: usize) -> bool { if let Some(func_idx) = lookup_function_by_pc(functions, code_len_total, abs_pc) { let (start, _end) = function_range(functions, func_idx, code_len_total); let rel = abs_pc.saturating_sub(start); return is_boundary(functions, code, code_len_total, func_idx, rel); } // Not inside any function range; allow exact function starts/ends as // valid boundaries (e.g., last function end == total code len). for i in 0..functions.len() { let (start, end) = function_range(functions, i, code_len_total); if abs_pc == start || abs_pc == end { return true; } } false } #[cfg(test)] mod tests { use super::*; use crate::asm::{assemble, Asm, Operand}; use crate::opcode::OpCode; fn build_funcs(offsets: &[usize], lens: Option<&[usize]>) -> Vec { let mut v = Vec::new(); for (i, off) in offsets.iter().copied().enumerate() { let len_u32 = lens.and_then(|ls| ls.get(i).copied()).unwrap_or(0) as u32; v.push(FunctionMeta { code_offset: off as u32, code_len: len_u32, param_slots: 0, local_slots: 0, return_slots: 0, max_stack_slots: 0, }); } v } #[test] fn boundaries_known_sequence() { // Build a function with mixed immediate sizes: // [NOP][PUSH_I32 4][PUSH_I64 8][PUSH_BOOL 1][HALT] let code = assemble(&[ Asm::Op(OpCode::Nop, vec![]), Asm::Op(OpCode::PushI32, vec![Operand::I32(123)]), Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]), Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)]), Asm::Op(OpCode::Halt, vec![]), ]).unwrap(); // Single function starting at 0 let code_len_total = code.len(); let mut funcs = build_funcs(&[0], None); recompute_function_lengths_in_place(&mut funcs, code_len_total); // Expected boundaries (relative): 0, 2, 8, 18, 21, 23 // Explanation per instruction size: opcode(2) + imm let expected = [0usize, 2, 8, 18, 21, 23]; for rel in 0..=expected.last().copied().unwrap() { let should_be_boundary = expected.contains(&rel); assert_eq!( is_boundary(&funcs, &code, code_len_total, 0, rel), should_be_boundary, "rel_pc={} boundary mismatch", rel ); } // Check absolute variant too for rel in expected { let abs = rel; assert!(is_boundary_abs(&funcs, &code, code_len_total, abs)); } } #[test] fn fuzz_table_monotonic_and_boundaries() { // Build a pseudo-random but valid sequence using a simple pattern over opcodes // to avoid invalid encodings. We cycle through a few known-good opcodes. let ops = [ OpCode::Nop, OpCode::PushI32, OpCode::PushBool, OpCode::PushI64, OpCode::Pop, OpCode::Ret, ]; let mut prog = Vec::new(); for i in 0..50 { let op = ops[i % ops.len()]; let asm = match op { OpCode::Nop => Asm::Op(OpCode::Nop, vec![]), OpCode::PushI32 => Asm::Op(OpCode::PushI32, vec![Operand::I32(i as i32)]), OpCode::PushBool => Asm::Op(OpCode::PushBool, vec![Operand::Bool(i % 2 == 0)]), OpCode::PushI64 => Asm::Op(OpCode::PushI64, vec![Operand::I64(i as i64)]), OpCode::Pop => Asm::Op(OpCode::Pop, vec![]), OpCode::Ret => Asm::Op(OpCode::Ret, vec![]), _ => unreachable!(), }; prog.push(asm); } let code = assemble(&prog).unwrap(); let code_len_total = code.len(); let mut funcs = build_funcs(&[0], None); recompute_function_lengths_in_place(&mut funcs, code_len_total); let (start, end) = function_range(&funcs, 0, code_len_total); assert_eq!(start, 0); assert_eq!(end, code_len_total); // Walk with decoder and verify boundaries are accepted let mut pc = start; while pc < end { assert!(is_boundary_abs(&funcs, &code, code_len_total, pc)); let di = decode_next(pc, &code).expect("decode_next"); assert!(di.next_pc > pc && di.next_pc <= end); pc = di.next_pc; } // End must be a boundary too assert!(is_boundary(&funcs, &code, code_len_total, 0, end - start)); } }