pr 04
This commit is contained in:
parent
c9b9f18b32
commit
1a8ec25c07
@ -1,7 +1,9 @@
|
|||||||
//! Shared bytecode layout utilities, used by both compiler (emitter/linker)
|
//! Shared bytecode layout utilities, used by both compiler (emitter/linker)
|
||||||
//! and the VM (verifier/loader). This ensures a single source of truth for
|
//! and the VM (verifier/loader). This ensures a single source of truth for
|
||||||
//! how function ranges and jump targets are interpreted post-link.
|
//! how function ranges, instruction boundaries, and pc→function lookups are
|
||||||
|
//! interpreted post-link.
|
||||||
|
|
||||||
|
use crate::decoder::decode_next;
|
||||||
use crate::FunctionMeta;
|
use crate::FunctionMeta;
|
||||||
|
|
||||||
/// Returns the absolute end (exclusive) of the function at `func_idx`,
|
/// Returns the absolute end (exclusive) of the function at `func_idx`,
|
||||||
@ -31,6 +33,24 @@ pub fn function_len_from_next(functions: &[FunctionMeta], func_idx: usize, code_
|
|||||||
end.saturating_sub(start)
|
end.saturating_sub(start)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Canonical function range [start, end) where `end` is the next function's
|
||||||
|
/// `code_offset` or `code_len_total` if this is the last function.
|
||||||
|
#[inline]
|
||||||
|
pub fn function_range(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> (usize, usize) {
|
||||||
|
let start = functions
|
||||||
|
.get(func_idx)
|
||||||
|
.map(|f| f.code_offset as usize)
|
||||||
|
.unwrap_or(0);
|
||||||
|
let end = function_end_from_next(functions, func_idx, code_len_total);
|
||||||
|
(start, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Canonical function length (in bytes).
|
||||||
|
#[inline]
|
||||||
|
pub fn function_len(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize {
|
||||||
|
function_len_from_next(functions, func_idx, code_len_total)
|
||||||
|
}
|
||||||
|
|
||||||
/// Recomputes all `code_len` values in place from the next function start
|
/// Recomputes all `code_len` values in place from the next function start
|
||||||
/// (exclusive end), using the combined code buffer length for the last one.
|
/// (exclusive end), using the combined code buffer length for the last one.
|
||||||
pub fn recompute_function_lengths_in_place(functions: &mut [FunctionMeta], code_len_total: usize) {
|
pub fn recompute_function_lengths_in_place(functions: &mut [FunctionMeta], code_len_total: usize) {
|
||||||
@ -53,3 +73,176 @@ pub fn function_index_by_pc(functions: &[FunctionMeta], code_len_total: usize, p
|
|||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Alias: canonical function lookup by absolute PC.
|
||||||
|
#[inline]
|
||||||
|
pub fn lookup_function_by_pc(functions: &[FunctionMeta], code_len_total: usize, pc_abs: usize) -> Option<usize> {
|
||||||
|
function_index_by_pc(functions, code_len_total, pc_abs)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if `rel_pc` (relative to the function start) is a valid
|
||||||
|
/// instruction boundary as determined by the canonical decoder.
|
||||||
|
///
|
||||||
|
/// Contract:
|
||||||
|
/// - `rel_pc == 0` is always a boundary if `func_idx` is valid.
|
||||||
|
/// - Boundaries are computed by stepping with `decoder::decode_next` from the
|
||||||
|
/// function start up to (and possibly past) `rel_pc` but never beyond the
|
||||||
|
/// function exclusive end.
|
||||||
|
/// - Any decode error before reaching `rel_pc` yields `false` (invalid program).
|
||||||
|
pub fn is_boundary(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, func_idx: usize, rel_pc: usize) -> bool {
|
||||||
|
let (start, end) = match functions.get(func_idx) {
|
||||||
|
Some(_) => function_range(functions, func_idx, code_len_total),
|
||||||
|
None => return false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let func_len = end.saturating_sub(start);
|
||||||
|
if rel_pc == 0 { return true; }
|
||||||
|
if rel_pc > func_len { return false; }
|
||||||
|
|
||||||
|
let target = start + rel_pc;
|
||||||
|
let mut pc = start;
|
||||||
|
while pc < end {
|
||||||
|
match decode_next(pc, code) {
|
||||||
|
Ok(di) => {
|
||||||
|
let next = di.next_pc;
|
||||||
|
if next > end { return false; }
|
||||||
|
if next == target { return true; }
|
||||||
|
if next <= pc { return false; } // must make progress
|
||||||
|
pc = next;
|
||||||
|
if pc > target { return false; }
|
||||||
|
}
|
||||||
|
Err(_) => return false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we reached end without matching `target`, only boundary is exact end
|
||||||
|
target == end
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if `abs_pc` is a valid instruction boundary for the function
|
||||||
|
/// containing it, according to the canonical decoder. Returns false if `abs_pc`
|
||||||
|
/// is not within any function range or if decoding fails.
|
||||||
|
pub fn is_boundary_abs(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, abs_pc: usize) -> bool {
|
||||||
|
if let Some(func_idx) = lookup_function_by_pc(functions, code_len_total, abs_pc) {
|
||||||
|
let (start, _end) = function_range(functions, func_idx, code_len_total);
|
||||||
|
let rel = abs_pc.saturating_sub(start);
|
||||||
|
return is_boundary(functions, code, code_len_total, func_idx, rel);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not inside any function range; allow exact function starts/ends as
|
||||||
|
// valid boundaries (e.g., last function end == total code len).
|
||||||
|
for i in 0..functions.len() {
|
||||||
|
let (start, end) = function_range(functions, i, code_len_total);
|
||||||
|
if abs_pc == start || abs_pc == end {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::asm::{assemble, Asm, Operand};
|
||||||
|
use crate::opcode::OpCode;
|
||||||
|
|
||||||
|
fn build_funcs(offsets: &[usize], lens: Option<&[usize]>) -> Vec<FunctionMeta> {
|
||||||
|
let mut v = Vec::new();
|
||||||
|
for (i, off) in offsets.iter().copied().enumerate() {
|
||||||
|
let len_u32 = lens.and_then(|ls| ls.get(i).copied()).unwrap_or(0) as u32;
|
||||||
|
v.push(FunctionMeta {
|
||||||
|
code_offset: off as u32,
|
||||||
|
code_len: len_u32,
|
||||||
|
param_slots: 0,
|
||||||
|
local_slots: 0,
|
||||||
|
return_slots: 0,
|
||||||
|
max_stack_slots: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
v
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn boundaries_known_sequence() {
|
||||||
|
// Build a function with mixed immediate sizes:
|
||||||
|
// [NOP][PUSH_I32 4][PUSH_I64 8][PUSH_BOOL 1][HALT]
|
||||||
|
let code = assemble(&[
|
||||||
|
Asm::Op(OpCode::Nop, vec![]),
|
||||||
|
Asm::Op(OpCode::PushI32, vec![Operand::I32(123)]),
|
||||||
|
Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]),
|
||||||
|
Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)]),
|
||||||
|
Asm::Op(OpCode::Halt, vec![]),
|
||||||
|
]).unwrap();
|
||||||
|
|
||||||
|
// Single function starting at 0
|
||||||
|
let code_len_total = code.len();
|
||||||
|
let mut funcs = build_funcs(&[0], None);
|
||||||
|
recompute_function_lengths_in_place(&mut funcs, code_len_total);
|
||||||
|
|
||||||
|
// Expected boundaries (relative): 0, 2, 8, 18, 21, 23
|
||||||
|
// Explanation per instruction size: opcode(2) + imm
|
||||||
|
let expected = [0usize, 2, 8, 18, 21, 23];
|
||||||
|
for rel in 0..=expected.last().copied().unwrap() {
|
||||||
|
let should_be_boundary = expected.contains(&rel);
|
||||||
|
assert_eq!(
|
||||||
|
is_boundary(&funcs, &code, code_len_total, 0, rel),
|
||||||
|
should_be_boundary,
|
||||||
|
"rel_pc={} boundary mismatch",
|
||||||
|
rel
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check absolute variant too
|
||||||
|
for rel in expected {
|
||||||
|
let abs = rel;
|
||||||
|
assert!(is_boundary_abs(&funcs, &code, code_len_total, abs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fuzz_table_monotonic_and_boundaries() {
|
||||||
|
// Build a pseudo-random but valid sequence using a simple pattern over opcodes
|
||||||
|
// to avoid invalid encodings. We cycle through a few known-good opcodes.
|
||||||
|
let ops = [
|
||||||
|
OpCode::Nop,
|
||||||
|
OpCode::PushI32,
|
||||||
|
OpCode::PushBool,
|
||||||
|
OpCode::PushI64,
|
||||||
|
OpCode::Pop,
|
||||||
|
OpCode::Ret,
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut prog = Vec::new();
|
||||||
|
for i in 0..50 {
|
||||||
|
let op = ops[i % ops.len()];
|
||||||
|
let asm = match op {
|
||||||
|
OpCode::Nop => Asm::Op(OpCode::Nop, vec![]),
|
||||||
|
OpCode::PushI32 => Asm::Op(OpCode::PushI32, vec![Operand::I32(i as i32)]),
|
||||||
|
OpCode::PushBool => Asm::Op(OpCode::PushBool, vec![Operand::Bool(i % 2 == 0)]),
|
||||||
|
OpCode::PushI64 => Asm::Op(OpCode::PushI64, vec![Operand::I64(i as i64)]),
|
||||||
|
OpCode::Pop => Asm::Op(OpCode::Pop, vec![]),
|
||||||
|
OpCode::Ret => Asm::Op(OpCode::Ret, vec![]),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
prog.push(asm);
|
||||||
|
}
|
||||||
|
|
||||||
|
let code = assemble(&prog).unwrap();
|
||||||
|
let code_len_total = code.len();
|
||||||
|
let mut funcs = build_funcs(&[0], None);
|
||||||
|
recompute_function_lengths_in_place(&mut funcs, code_len_total);
|
||||||
|
let (start, end) = function_range(&funcs, 0, code_len_total);
|
||||||
|
assert_eq!(start, 0);
|
||||||
|
assert_eq!(end, code_len_total);
|
||||||
|
|
||||||
|
// Walk with decoder and verify boundaries are accepted
|
||||||
|
let mut pc = start;
|
||||||
|
while pc < end {
|
||||||
|
assert!(is_boundary_abs(&funcs, &code, code_len_total, pc));
|
||||||
|
let di = decode_next(pc, &code).expect("decode_next");
|
||||||
|
assert!(di.next_pc > pc && di.next_pc <= end);
|
||||||
|
pc = di.next_pc;
|
||||||
|
}
|
||||||
|
// End must be a boundary too
|
||||||
|
assert!(is_boundary(&funcs, &code, code_len_total, 0, end - start));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user