diff --git a/crates/prometeu-bytecode/src/layout.rs b/crates/prometeu-bytecode/src/layout.rs index 6fac6dfd..4f48973b 100644 --- a/crates/prometeu-bytecode/src/layout.rs +++ b/crates/prometeu-bytecode/src/layout.rs @@ -6,57 +6,53 @@ use crate::decoder::decode_next; use crate::FunctionMeta; -/// Returns the absolute end (exclusive) of the function at `func_idx`, -/// defined as the minimum `code_offset` of any subsequent function, or -/// `code_len_total` if this is the last function. -#[inline] -pub fn function_end_from_next(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize { - let start = functions.get(func_idx).map(|f| f.code_offset as usize).unwrap_or(0); - let mut end = code_len_total; - for (j, other) in functions.iter().enumerate() { - if j == func_idx { continue; } - let other_start = other.code_offset as usize; - if other_start > start && other_start < end { - end = other_start; +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FunctionLayout { + pub start: usize, + pub end: usize, // exclusive +} + +/// Precompute canonical [start, end) ranges for all functions. +/// +/// Contract: +/// - Ranges are computed by sorting functions by `code_offset` (stable), +/// then using the next function's start as the current end; the last +/// function ends at `code_len_total`. +/// - The returned vector is indexed by the original function indices. +pub fn compute_function_layouts(functions: &[FunctionMeta], code_len_total: usize) -> Vec { + // Build index array and sort by start offset (stable to preserve relative order). + let mut idxs: Vec = (0..functions.len()).collect(); + idxs.sort_by_key(|&i| functions[i].code_offset as usize); + + // Optional guard: offsets should be strictly increasing (duplicates are suspicious). + for w in idxs.windows(2) { + if let [a, b] = *w { + let sa = functions[a].code_offset as usize; + let sb = functions[b].code_offset as usize; + debug_assert!(sa < sb, "Function code_offset must be strictly increasing: {} vs {} (indices {} and {})", sa, sb, a, b); } } - end -} -/// Returns the length (in bytes) of the function at `func_idx`, using -/// the canonical definition: end = start of next function (exclusive), -/// or total code len if last. -#[inline] -pub fn function_len_from_next(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize { - let start = functions.get(func_idx).map(|f| f.code_offset as usize).unwrap_or(0); - let end = function_end_from_next(functions, func_idx, code_len_total); - end.saturating_sub(start) -} - -/// Canonical function range [start, end) where `end` is the next function's -/// `code_offset` or `code_len_total` if this is the last function. -#[inline] -pub fn function_range(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> (usize, usize) { - let start = functions - .get(func_idx) - .map(|f| f.code_offset as usize) - .unwrap_or(0); - let end = function_end_from_next(functions, func_idx, code_len_total); - (start, end) -} - -/// Canonical function length (in bytes). -#[inline] -pub fn function_len(functions: &[FunctionMeta], func_idx: usize, code_len_total: usize) -> usize { - function_len_from_next(functions, func_idx, code_len_total) + let mut out = vec![FunctionLayout { start: 0, end: 0 }; functions.len()]; + for (pos, &i) in idxs.iter().enumerate() { + let start = functions[i].code_offset as usize; + let end = if pos + 1 < idxs.len() { + functions[idxs[pos + 1]].code_offset as usize + } else { + code_len_total + }; + out[i] = FunctionLayout { start, end }; + } + out } /// Recomputes all `code_len` values in place from the next function start /// (exclusive end), using the combined code buffer length for the last one. pub fn recompute_function_lengths_in_place(functions: &mut [FunctionMeta], code_len_total: usize) { + let layouts = compute_function_layouts(functions, code_len_total); for i in 0..functions.len() { - let start = functions[i].code_offset as usize; - let end = function_end_from_next(functions, i, code_len_total); + let start = layouts[i].start; + let end = layouts[i].end; functions[i].code_len = end.saturating_sub(start) as u32; } } @@ -64,9 +60,10 @@ pub fn recompute_function_lengths_in_place(functions: &mut [FunctionMeta], code_ /// Finds the function index that contains `pc_abs` (absolute), using the /// canonical ranges (end = next start, exclusive). Returns `None` if none. pub fn function_index_by_pc(functions: &[FunctionMeta], code_len_total: usize, pc_abs: usize) -> Option { + let layouts = compute_function_layouts(functions, code_len_total); for i in 0..functions.len() { - let start = functions[i].code_offset as usize; - let end = function_end_from_next(functions, i, code_len_total); + let start = layouts[i].start; + let end = layouts[i].end; if pc_abs >= start && pc_abs < end { return Some(i); } @@ -91,7 +88,11 @@ pub fn lookup_function_by_pc(functions: &[FunctionMeta], code_len_total: usize, /// - Any decode error before reaching `rel_pc` yields `false` (invalid program). pub fn is_boundary(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, func_idx: usize, rel_pc: usize) -> bool { let (start, end) = match functions.get(func_idx) { - Some(_) => function_range(functions, func_idx, code_len_total), + Some(_) => { + let layouts = compute_function_layouts(functions, code_len_total); + let l = &layouts[func_idx]; + (l.start, l.end) + } None => return false, }; @@ -123,15 +124,21 @@ pub fn is_boundary(functions: &[FunctionMeta], code: &[u8], code_len_total: usiz /// is not within any function range or if decoding fails. pub fn is_boundary_abs(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, abs_pc: usize) -> bool { if let Some(func_idx) = lookup_function_by_pc(functions, code_len_total, abs_pc) { - let (start, _end) = function_range(functions, func_idx, code_len_total); + let layouts = compute_function_layouts(functions, code_len_total); + let (start, _end) = { + let l = &layouts[func_idx]; + (l.start, l.end) + }; let rel = abs_pc.saturating_sub(start); return is_boundary(functions, code, code_len_total, func_idx, rel); } // Not inside any function range; allow exact function starts/ends as // valid boundaries (e.g., last function end == total code len). + let layouts = compute_function_layouts(functions, code_len_total); for i in 0..functions.len() { - let (start, end) = function_range(functions, i, code_len_total); + let start = layouts[i].start; + let end = layouts[i].end; if abs_pc == start || abs_pc == end { return true; } @@ -230,7 +237,8 @@ mod tests { let code_len_total = code.len(); let mut funcs = build_funcs(&[0], None); recompute_function_lengths_in_place(&mut funcs, code_len_total); - let (start, end) = function_range(&funcs, 0, code_len_total); + let layouts = compute_function_layouts(&funcs, code_len_total); + let (start, end) = (layouts[0].start, layouts[0].end); assert_eq!(start, 0); assert_eq!(end, code_len_total); @@ -245,4 +253,21 @@ mod tests { // End must be a boundary too assert!(is_boundary(&funcs, &code, code_len_total, 0, end - start)); } + + #[test] + fn compute_function_layouts_end_is_next_start() { + // Synthetic functions with known offsets: 0, 10, 25; total_len = 40 + let funcs = build_funcs(&[0, 10, 25], None); + let layouts = compute_function_layouts(&funcs, 40); + + assert_eq!(layouts.len(), 3); + assert_eq!(layouts[0], FunctionLayout { start: 0, end: 10 }); + assert_eq!(layouts[1], FunctionLayout { start: 10, end: 25 }); + assert_eq!(layouts[2], FunctionLayout { start: 25, end: 40 }); + + for i in 0..3 { + let l = &layouts[i]; + assert_eq!(l.end - l.start, (funcs.get(i + 1).map(|n| n.code_offset as usize).unwrap_or(40)) - (funcs[i].code_offset as usize)); + } + } } diff --git a/crates/prometeu-compiler/src/building/linker.rs b/crates/prometeu-compiler/src/building/linker.rs index cd79a06e..9697de9c 100644 --- a/crates/prometeu-compiler/src/building/linker.rs +++ b/crates/prometeu-compiler/src/building/linker.rs @@ -335,10 +335,11 @@ impl Linker { // This preserves exact ends emitted by the compiler while still filling lengths for functions // that lack enriched annotations. let total_len = combined_code.len(); + let layouts = layout::compute_function_layouts(&combined_functions, total_len); for i in 0..combined_functions.len() { if !has_precise_len.get(i).copied().unwrap_or(false) { - let start = combined_functions[i].code_offset as usize; - let end = layout::function_end_from_next(&combined_functions, i, total_len); + let start = layouts[i].start; + let end = layouts[i].end; combined_functions[i].code_len = end.saturating_sub(start) as u32; } } diff --git a/crates/prometeu-vm/src/verifier.rs b/crates/prometeu-vm/src/verifier.rs index 899f0534..725159d2 100644 --- a/crates/prometeu-vm/src/verifier.rs +++ b/crates/prometeu-vm/src/verifier.rs @@ -27,16 +27,24 @@ pub struct Verifier; impl Verifier { pub fn verify(code: &[u8], functions: &[FunctionMeta]) -> Result, VerifierError> { let mut max_stacks = Vec::with_capacity(functions.len()); + // Precompute function [start, end) ranges once for O(1) lookups + let layouts = layout::compute_function_layouts(functions, code.len()); for (i, func) in functions.iter().enumerate() { - max_stacks.push(Self::verify_function(code, func, i, functions)?); + max_stacks.push(Self::verify_function(code, func, i, functions, &layouts)?); } Ok(max_stacks) } - fn verify_function(code: &[u8], func: &FunctionMeta, func_idx: usize, all_functions: &[FunctionMeta]) -> Result { + fn verify_function( + code: &[u8], + func: &FunctionMeta, + func_idx: usize, + all_functions: &[FunctionMeta], + layouts: &[layout::FunctionLayout], + ) -> Result { let func_start = func.code_offset as usize; - // Use o cálculo canônico compartilhado com o compiler/linker - let func_end = layout::function_end_from_next(all_functions, func_idx, code.len()); + // Use precomputed canonical range end + let func_end = layouts.get(func_idx).map(|l| l.end).unwrap_or_else(|| code.len()); if func_start > code.len() || func_end > code.len() || func_start > func_end { return Err(VerifierError::FunctionOutOfBounds { @@ -135,7 +143,7 @@ impl Verifier { if spec.is_branch { // Canonical contract: branch immediate is RELATIVE to function start. let target_rel = instr.imm_u32().unwrap() as usize; - let func_end_abs = layout::function_end_from_next(all_functions, func_idx, code.len()); + let func_end_abs = layouts.get(func_idx).map(|l| l.end).unwrap_or_else(|| code.len()); let func_len = func_end_abs - func_start; if target_rel > func_len { @@ -187,7 +195,7 @@ impl Verifier { if !spec.is_terminator { let next_pc = instr.next_pc; - let func_len = layout::function_len_from_next(all_functions, func_idx, code.len()); + let func_len = layouts.get(func_idx).map(|l| l.end - l.start).unwrap_or_else(|| 0); if next_pc < func_len { if let Some(&existing_height) = stack_height_in.get(&next_pc) { if existing_height != out_height { diff --git a/test-cartridges/canonical/golden/program.pbc b/test-cartridges/canonical/golden/program.pbc index 5a70490d..3646ac85 100644 Binary files a/test-cartridges/canonical/golden/program.pbc and b/test-cartridges/canonical/golden/program.pbc differ