prometeu-runtime/crates/prometeu-bytecode/src/layout.rs

//! Shared bytecode layout utilities, used by both compiler (emitter/linker)
//! and the VM (verifier/loader). This ensures a single source of truth for
//! how function ranges, instruction boundaries, and pc→function lookups are
//! interpreted post-link.

use crate::decoder::decode_next;
use crate::FunctionMeta;

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FunctionLayout {
    pub start: usize,
    pub end: usize, // exclusive
}

/// Precompute canonical [start, end) ranges for all functions.
///
/// Contract:
/// - Ranges are computed by sorting functions by `code_offset` (stable),
///   then using the next function's start as the current end; the last
///   function ends at `code_len_total`.
/// - The returned vector is indexed by the original function indices.
pub fn compute_function_layouts(functions: &[FunctionMeta], code_len_total: usize) -> Vec<FunctionLayout> {
    // Build index array and sort by start offset (stable to preserve relative order).
    let mut idxs: Vec<usize> = (0..functions.len()).collect();
    idxs.sort_by_key(|&i| functions[i].code_offset as usize);

    // Optional guard: offsets should be strictly increasing (duplicates are suspicious).
    for w in idxs.windows(2) {
        if let [a, b] = *w {
            let sa = functions[a].code_offset as usize;
            let sb = functions[b].code_offset as usize;
            debug_assert!(sa < sb, "Function code_offset must be strictly increasing: {} vs {} (indices {} and {})", sa, sb, a, b);
        }
    }

    let mut out = vec![FunctionLayout { start: 0, end: 0 }; functions.len()];
    for (pos, &i) in idxs.iter().enumerate() {
        let start = functions[i].code_offset as usize;
        let end = if pos + 1 < idxs.len() {
            functions[idxs[pos + 1]].code_offset as usize
        } else {
            code_len_total
        };
        out[i] = FunctionLayout { start, end };
    }
    out
}

/// Recomputes all `code_len` values in place from the next function start
/// (exclusive end), using the combined code buffer length for the last one.
pub fn recompute_function_lengths_in_place(functions: &mut [FunctionMeta], code_len_total: usize) {
    let layouts = compute_function_layouts(functions, code_len_total);
    for i in 0..functions.len() {
        let start = layouts[i].start;
        let end = layouts[i].end;
        functions[i].code_len = end.saturating_sub(start) as u32;
    }
}

/// Finds the function index that contains `pc_abs` (absolute), using the
/// canonical ranges (end = next start, exclusive). Returns `None` if none.
pub fn function_index_by_pc(functions: &[FunctionMeta], code_len_total: usize, pc_abs: usize) -> Option<usize> {
    let layouts = compute_function_layouts(functions, code_len_total);
    for i in 0..functions.len() {
        let start = layouts[i].start;
        let end = layouts[i].end;
        if pc_abs >= start && pc_abs < end {
            return Some(i);
        }
    }
    None
}

/// Alias: canonical function lookup by absolute PC.
#[inline]
pub fn lookup_function_by_pc(functions: &[FunctionMeta], code_len_total: usize, pc_abs: usize) -> Option<usize> {
    function_index_by_pc(functions, code_len_total, pc_abs)
}

/// Returns true if `rel_pc` (relative to the function start) is a valid
/// instruction boundary as determined by the canonical decoder.
///
/// Contract:
/// - `rel_pc == 0` is always a boundary if `func_idx` is valid.
/// - Boundaries are computed by stepping with `decoder::decode_next` from the
///   function start up to (and possibly past) `rel_pc` but never beyond the
///   function exclusive end.
/// - Any decode error before reaching `rel_pc` yields `false` (invalid program).
pub fn is_boundary(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, func_idx: usize, rel_pc: usize) -> bool {
    let (start, end) = match functions.get(func_idx) {
        Some(_) => {
            let layouts = compute_function_layouts(functions, code_len_total);
            let l = &layouts[func_idx];
            (l.start, l.end)
        }
        None => return false,
    };

    let func_len = end.saturating_sub(start);
    if rel_pc == 0 { return true; }
    if rel_pc > func_len { return false; }

    let target = start + rel_pc;
    let mut pc = start;
    while pc < end {
        match decode_next(pc, code) {
            Ok(di) => {
                let next = di.next_pc;
                if next > end { return false; }
                if next == target { return true; }
                if next <= pc { return false; } // must make progress
                pc = next;
                if pc > target { return false; }
            }
            Err(_) => return false,
        }
    }
    // If we reached end without matching `target`, only boundary is exact end
    target == end
}

/// Returns true if `abs_pc` is a valid instruction boundary for the function
/// containing it, according to the canonical decoder. Returns false if `abs_pc`
/// is not within any function range or if decoding fails.
pub fn is_boundary_abs(functions: &[FunctionMeta], code: &[u8], code_len_total: usize, abs_pc: usize) -> bool {
    if let Some(func_idx) = lookup_function_by_pc(functions, code_len_total, abs_pc) {
        let layouts = compute_function_layouts(functions, code_len_total);
        let (start, _end) = {
            let l = &layouts[func_idx];
            (l.start, l.end)
        };
        let rel = abs_pc.saturating_sub(start);
        return is_boundary(functions, code, code_len_total, func_idx, rel);
    }

    // Not inside any function range; allow exact function starts/ends as
    // valid boundaries (e.g., last function end == total code len).
    let layouts = compute_function_layouts(functions, code_len_total);
    for i in 0..functions.len() {
        let start = layouts[i].start;
        let end = layouts[i].end;
        if abs_pc == start || abs_pc == end {
            return true;
        }
    }
    false
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::asm::{assemble, Asm, Operand};
    use crate::opcode::OpCode;

    fn build_funcs(offsets: &[usize], lens: Option<&[usize]>) -> Vec<FunctionMeta> {
        let mut v = Vec::new();
        for (i, off) in offsets.iter().copied().enumerate() {
            let len_u32 = lens.and_then(|ls| ls.get(i).copied()).unwrap_or(0) as u32;
            v.push(FunctionMeta {
                code_offset: off as u32,
                code_len: len_u32,
                param_slots: 0,
                local_slots: 0,
                return_slots: 0,
                max_stack_slots: 0,
            });
        }
        v
    }

    #[test]
    fn boundaries_known_sequence() {
        // Build a function with mixed immediate sizes:
        // [NOP][PUSH_I32 4][PUSH_I64 8][PUSH_BOOL 1][HALT]
        let code = assemble(&[
            Asm::Op(OpCode::Nop, vec![]),
            Asm::Op(OpCode::PushI32, vec![Operand::I32(123)]),
            Asm::Op(OpCode::PushI64, vec![Operand::I64(42)]),
            Asm::Op(OpCode::PushBool, vec![Operand::Bool(true)]),
            Asm::Op(OpCode::Halt, vec![]),
        ]).unwrap();

        // Single function starting at 0
        let code_len_total = code.len();
        let mut funcs = build_funcs(&[0], None);
        recompute_function_lengths_in_place(&mut funcs, code_len_total);

        // Expected boundaries (relative): 0, 2, 8, 18, 21, 23
        // Explanation per instruction size: opcode(2) + imm
        let expected = [0usize, 2, 8, 18, 21, 23];
        for rel in 0..=expected.last().copied().unwrap() {
            let should_be_boundary = expected.contains(&rel);
            assert_eq!(
                is_boundary(&funcs, &code, code_len_total, 0, rel),
                should_be_boundary,
                "rel_pc={} boundary mismatch",
                rel
            );
        }

        // Check absolute variant too
        for rel in expected {
            let abs = rel;
            assert!(is_boundary_abs(&funcs, &code, code_len_total, abs));
        }
    }

    #[test]
    fn fuzz_table_monotonic_and_boundaries() {
        // Build a pseudo-random but valid sequence using a simple pattern over opcodes
        // to avoid invalid encodings. We cycle through a few known-good opcodes.
        let ops = [
            OpCode::Nop,
            OpCode::PushI32,
            OpCode::PushBool,
            OpCode::PushI64,
            OpCode::Pop,
            OpCode::Ret,
        ];

        let mut prog = Vec::new();
        for i in 0..50 {
            let op = ops[i % ops.len()];
            let asm = match op {
                OpCode::Nop => Asm::Op(OpCode::Nop, vec![]),
                OpCode::PushI32 => Asm::Op(OpCode::PushI32, vec![Operand::I32(i as i32)]),
                OpCode::PushBool => Asm::Op(OpCode::PushBool, vec![Operand::Bool(i % 2 == 0)]),
                OpCode::PushI64 => Asm::Op(OpCode::PushI64, vec![Operand::I64(i as i64)]),
                OpCode::Pop => Asm::Op(OpCode::Pop, vec![]),
                OpCode::Ret => Asm::Op(OpCode::Ret, vec![]),
                _ => unreachable!(),
            };
            prog.push(asm);
        }

        let code = assemble(&prog).unwrap();
        let code_len_total = code.len();
        let mut funcs = build_funcs(&[0], None);
        recompute_function_lengths_in_place(&mut funcs, code_len_total);
        let layouts = compute_function_layouts(&funcs, code_len_total);
        let (start, end) = (layouts[0].start, layouts[0].end);
        assert_eq!(start, 0);
        assert_eq!(end, code_len_total);

        // Walk with decoder and verify boundaries are accepted
        let mut pc = start;
        while pc < end {
            assert!(is_boundary_abs(&funcs, &code, code_len_total, pc));
            let di = decode_next(pc, &code).expect("decode_next");
            assert!(di.next_pc > pc && di.next_pc <= end);
            pc = di.next_pc;
        }
        // End must be a boundary too
        assert!(is_boundary(&funcs, &code, code_len_total, 0, end - start));
    }

    #[test]
    fn compute_function_layouts_end_is_next_start() {
        // Synthetic functions with known offsets: 0, 10, 25; total_len = 40
        let funcs = build_funcs(&[0, 10, 25], None);
        let layouts = compute_function_layouts(&funcs, 40);

        assert_eq!(layouts.len(), 3);
        assert_eq!(layouts[0], FunctionLayout { start: 0, end: 10 });
        assert_eq!(layouts[1], FunctionLayout { start: 10, end: 25 });
        assert_eq!(layouts[2], FunctionLayout { start: 25, end: 40 });

        for i in 0..3 {
            let l = &layouts[i];
            assert_eq!(l.end - l.start, (funcs.get(i + 1).map(|n| n.code_offset as usize).unwrap_or(40)) - (funcs[i].code_offset as usize));
        }
    }
}