From e784dab34ef76f98f89a3979b92e85e59979164e Mon Sep 17 00:00:00 2001 From: Nilton Constantino Date: Sat, 31 Jan 2026 17:23:29 +0000 Subject: [PATCH] pr 43 --- crates/prometeu-bytecode/src/opcode.rs | 6 +- crates/prometeu-bytecode/src/v0/mod.rs | 5 +- crates/prometeu-core/src/hardware/syscalls.rs | 9 + .../src/virtual_machine/bytecode/decoder.rs | 47 +++ .../src/virtual_machine/bytecode/mod.rs | 1 + .../src/virtual_machine/call_frame.rs | 1 + .../prometeu-core/src/virtual_machine/mod.rs | 6 + .../src/virtual_machine/opcode_spec.rs | 81 ++++ .../src/virtual_machine/program.rs | 12 +- .../src/virtual_machine/verifier.rs | 314 ++++++++++++++++ .../src/virtual_machine/virtual_machine.rs | 355 +++++++++++------- docs/specs/pbs/files/PRs para Junie.md | 42 --- 12 files changed, 688 insertions(+), 191 deletions(-) create mode 100644 crates/prometeu-core/src/virtual_machine/bytecode/decoder.rs create mode 100644 crates/prometeu-core/src/virtual_machine/bytecode/mod.rs create mode 100644 crates/prometeu-core/src/virtual_machine/opcode_spec.rs create mode 100644 crates/prometeu-core/src/virtual_machine/verifier.rs diff --git a/crates/prometeu-bytecode/src/opcode.rs b/crates/prometeu-bytecode/src/opcode.rs index f366fa83..efe010f6 100644 --- a/crates/prometeu-bytecode/src/opcode.rs +++ b/crates/prometeu-bytecode/src/opcode.rs @@ -145,9 +145,9 @@ pub enum OpCode { // --- 6.6 Functions --- - /// Calls a function at a specific address. - /// Operands: addr (u32), args_count (u32) - /// Stack: [arg0, arg1, ...] -> [return_value] + /// Calls a function by its index in the function table. + /// Operand: func_id (u32) + /// Stack: [arg0, arg1, ...] -> [return_slots...] Call = 0x50, /// Returns from the current function. /// Stack: [return_val] -> [return_val] diff --git a/crates/prometeu-bytecode/src/v0/mod.rs b/crates/prometeu-bytecode/src/v0/mod.rs index 72c93439..9f8a1486 100644 --- a/crates/prometeu-bytecode/src/v0/mod.rs +++ b/crates/prometeu-bytecode/src/v0/mod.rs @@ -250,7 +250,10 @@ fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> { OpCode::PushBool => { pos += 1; } - OpCode::Call | OpCode::Alloc => { + OpCode::Call => { + pos += 4; + } + OpCode::Alloc => { pos += 8; } _ => {} diff --git a/crates/prometeu-core/src/hardware/syscalls.rs b/crates/prometeu-core/src/hardware/syscalls.rs index 2a733ca8..4a9234c9 100644 --- a/crates/prometeu-core/src/hardware/syscalls.rs +++ b/crates/prometeu-core/src/hardware/syscalls.rs @@ -203,4 +203,13 @@ impl Syscall { Self::BankSlotInfo => 2, } } + + pub fn results_count(&self) -> usize { + match self { + Self::GfxClear565 => 0, + Self::InputPadSnapshot => 48, + Self::InputTouchSnapshot => 6, + _ => 1, + } + } } diff --git a/crates/prometeu-core/src/virtual_machine/bytecode/decoder.rs b/crates/prometeu-core/src/virtual_machine/bytecode/decoder.rs new file mode 100644 index 00000000..82ce773e --- /dev/null +++ b/crates/prometeu-core/src/virtual_machine/bytecode/decoder.rs @@ -0,0 +1,47 @@ +use prometeu_bytecode::opcode::OpCode; +use crate::virtual_machine::opcode_spec::{OpcodeSpec, OpCodeSpecExt}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DecodeError { + TruncatedOpcode { pc: usize }, + UnknownOpcode { pc: usize, opcode: u16 }, + TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize }, +} + +#[derive(Debug, Clone)] +pub struct DecodedInstr<'a> { + pub opcode: OpCode, + pub spec: OpcodeSpec, + pub imm: &'a [u8], + pub next_pc: usize, +} + +pub fn decode_at(rom: &[u8], pc: usize) -> Result, DecodeError> { + if pc + 2 > rom.len() { + return Err(DecodeError::TruncatedOpcode { pc }); + } + let opcode_val = u16::from_le_bytes([rom[pc], rom[pc+1]]); + let opcode = OpCode::try_from(opcode_val).map_err(|_| DecodeError::UnknownOpcode { pc, opcode: opcode_val })?; + let spec = opcode.spec(); + + let imm_start = pc + 2; + let imm_end = imm_start + spec.imm_bytes as usize; + + if imm_end > rom.len() { + return Err(DecodeError::TruncatedImmediate { + pc, + opcode, + need: spec.imm_bytes as usize, + have: rom.len().saturating_sub(imm_start) + }); + } + + let imm = &rom[imm_start..imm_end]; + + Ok(DecodedInstr { + opcode, + spec, + imm, + next_pc: imm_end, + }) +} diff --git a/crates/prometeu-core/src/virtual_machine/bytecode/mod.rs b/crates/prometeu-core/src/virtual_machine/bytecode/mod.rs new file mode 100644 index 00000000..56812db3 --- /dev/null +++ b/crates/prometeu-core/src/virtual_machine/bytecode/mod.rs @@ -0,0 +1 @@ +pub mod decoder; diff --git a/crates/prometeu-core/src/virtual_machine/call_frame.rs b/crates/prometeu-core/src/virtual_machine/call_frame.rs index afb68271..62a9fd73 100644 --- a/crates/prometeu-core/src/virtual_machine/call_frame.rs +++ b/crates/prometeu-core/src/virtual_machine/call_frame.rs @@ -1,4 +1,5 @@ pub struct CallFrame { pub return_pc: u32, pub stack_base: usize, + pub func_idx: usize, } \ No newline at end of file diff --git a/crates/prometeu-core/src/virtual_machine/mod.rs b/crates/prometeu-core/src/virtual_machine/mod.rs index 144cbcea..92e3d2d7 100644 --- a/crates/prometeu-core/src/virtual_machine/mod.rs +++ b/crates/prometeu-core/src/virtual_machine/mod.rs @@ -3,6 +3,9 @@ mod value; mod call_frame; mod scope_frame; mod program; +pub mod opcode_spec; +pub mod bytecode; +pub mod verifier; use crate::hardware::HardwareBridge; pub use program::Program; @@ -10,6 +13,7 @@ pub use prometeu_bytecode::opcode::OpCode; pub use value::Value; pub use virtual_machine::{BudgetReport, LogicalFrameEndingReason, VirtualMachine}; pub use prometeu_bytecode::abi::TrapInfo; +pub use verifier::VerifierError; pub type SyscallId = u32; @@ -26,6 +30,8 @@ pub enum VmInitError { PpbcParseFailed, PbsV0LoadFailed(prometeu_bytecode::v0::LoadError), EntrypointNotFound, + VerificationFailed(VerifierError), + UnsupportedLegacyCallEncoding, } pub struct HostReturn<'a> { diff --git a/crates/prometeu-core/src/virtual_machine/opcode_spec.rs b/crates/prometeu-core/src/virtual_machine/opcode_spec.rs new file mode 100644 index 00000000..6c42029c --- /dev/null +++ b/crates/prometeu-core/src/virtual_machine/opcode_spec.rs @@ -0,0 +1,81 @@ +use prometeu_bytecode::opcode::OpCode; + +/// Specification for a single OpCode. +/// All JMP/JMP_IF_* immediates are u32 absolute offsets from function start. +#[derive(Debug, Clone, Copy)] +pub struct OpcodeSpec { + pub name: &'static str, + pub imm_bytes: u8, // immediate payload size (decode) + pub pops: u16, // slots popped + pub pushes: u16, // slots pushed + pub is_branch: bool, // has a control-flow target + pub is_terminator: bool, // ends basic block: JMP/RET/TRAP/HALT + pub may_trap: bool, // runtime trap possible +} + +pub trait OpCodeSpecExt { + fn spec(&self) -> OpcodeSpec; +} + +impl OpCodeSpecExt for OpCode { + fn spec(&self) -> OpcodeSpec { + match self { + OpCode::Nop => OpcodeSpec { name: "NOP", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Halt => OpcodeSpec { name: "HALT", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: false }, + OpCode::Jmp => OpcodeSpec { name: "JMP", imm_bytes: 4, pops: 0, pushes: 0, is_branch: true, is_terminator: true, may_trap: false }, + OpCode::JmpIfFalse => OpcodeSpec { name: "JMP_IF_FALSE", imm_bytes: 4, pops: 1, pushes: 0, is_branch: true, is_terminator: false, may_trap: false }, + OpCode::JmpIfTrue => OpcodeSpec { name: "JMP_IF_TRUE", imm_bytes: 4, pops: 1, pushes: 0, is_branch: true, is_terminator: false, may_trap: false }, + OpCode::Trap => OpcodeSpec { name: "TRAP", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: true }, + OpCode::PushConst => OpcodeSpec { name: "PUSH_CONST", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Pop => OpcodeSpec { name: "POP", imm_bytes: 0, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PopN => OpcodeSpec { name: "POP_N", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Dup => OpcodeSpec { name: "DUP", imm_bytes: 0, pops: 1, pushes: 2, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Swap => OpcodeSpec { name: "SWAP", imm_bytes: 0, pops: 2, pushes: 2, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushI64 => OpcodeSpec { name: "PUSH_I64", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushF64 => OpcodeSpec { name: "PUSH_F64", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushBool => OpcodeSpec { name: "PUSH_BOOL", imm_bytes: 1, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushI32 => OpcodeSpec { name: "PUSH_I32", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushBounded => OpcodeSpec { name: "PUSH_BOUNDED", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Add => OpcodeSpec { name: "ADD", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Sub => OpcodeSpec { name: "SUB", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Mul => OpcodeSpec { name: "MUL", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Div => OpcodeSpec { name: "DIV", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Eq => OpcodeSpec { name: "EQ", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Neq => OpcodeSpec { name: "NEQ", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Lt => OpcodeSpec { name: "LT", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Gt => OpcodeSpec { name: "GT", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::And => OpcodeSpec { name: "AND", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Or => OpcodeSpec { name: "OR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Not => OpcodeSpec { name: "NOT", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::BitAnd => OpcodeSpec { name: "BIT_AND", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::BitOr => OpcodeSpec { name: "BIT_OR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::BitXor => OpcodeSpec { name: "BIT_XOR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Shl => OpcodeSpec { name: "SHL", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Shr => OpcodeSpec { name: "SHR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Lte => OpcodeSpec { name: "LTE", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Gte => OpcodeSpec { name: "GTE", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Neg => OpcodeSpec { name: "NEG", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::GetGlobal => OpcodeSpec { name: "GET_GLOBAL", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::SetGlobal => OpcodeSpec { name: "SET_GLOBAL", imm_bytes: 4, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::GetLocal => OpcodeSpec { name: "GET_LOCAL", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::SetLocal => OpcodeSpec { name: "SET_LOCAL", imm_bytes: 4, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Call => OpcodeSpec { name: "CALL", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Ret => OpcodeSpec { name: "RET", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: false }, + OpCode::PushScope => OpcodeSpec { name: "PUSH_SCOPE", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PopScope => OpcodeSpec { name: "POP_SCOPE", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Alloc => OpcodeSpec { name: "ALLOC", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateLoad => OpcodeSpec { name: "GATE_LOAD", imm_bytes: 4, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateStore => OpcodeSpec { name: "GATE_STORE", imm_bytes: 4, pops: 2, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateBeginPeek => OpcodeSpec { name: "GATE_BEGIN_PEEK", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateEndPeek => OpcodeSpec { name: "GATE_END_PEEK", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateBeginBorrow => OpcodeSpec { name: "GATE_BEGIN_BORROW", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateEndBorrow => OpcodeSpec { name: "GATE_END_BORROW", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateBeginMutate => OpcodeSpec { name: "GATE_BEGIN_MUTATE", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateEndMutate => OpcodeSpec { name: "GATE_END_MUTATE", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateRetain => OpcodeSpec { name: "GATE_RETAIN", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateRelease => OpcodeSpec { name: "GATE_RELEASE", imm_bytes: 0, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Syscall => OpcodeSpec { name: "SYSCALL", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::FrameSync => OpcodeSpec { name: "FRAME_SYNC", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + } + } +} diff --git a/crates/prometeu-core/src/virtual_machine/program.rs b/crates/prometeu-core/src/virtual_machine/program.rs index e6a58592..c0982d74 100644 --- a/crates/prometeu-core/src/virtual_machine/program.rs +++ b/crates/prometeu-core/src/virtual_machine/program.rs @@ -1,17 +1,27 @@ use crate::virtual_machine::Value; +use prometeu_bytecode::v0::FunctionMeta; use std::sync::Arc; #[derive(Debug, Clone, Default)] pub struct Program { pub rom: Arc<[u8]>, pub constant_pool: Arc<[Value]>, + pub functions: Arc<[FunctionMeta]>, } impl Program { - pub fn new(rom: Vec, constant_pool: Vec) -> Self { + pub fn new(rom: Vec, constant_pool: Vec, mut functions: Vec) -> Self { + if functions.is_empty() && !rom.is_empty() { + functions.push(FunctionMeta { + code_offset: 0, + code_len: rom.len() as u32, + ..Default::default() + }); + } Self { rom: Arc::from(rom), constant_pool: Arc::from(constant_pool), + functions: Arc::from(functions), } } } diff --git a/crates/prometeu-core/src/virtual_machine/verifier.rs b/crates/prometeu-core/src/virtual_machine/verifier.rs new file mode 100644 index 00000000..ba63820d --- /dev/null +++ b/crates/prometeu-core/src/virtual_machine/verifier.rs @@ -0,0 +1,314 @@ +use prometeu_bytecode::v0::FunctionMeta; +use crate::virtual_machine::bytecode::decoder::{decode_at, DecodeError}; +use prometeu_bytecode::opcode::OpCode; +use std::collections::{HashMap, VecDeque, HashSet}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VerifierError { + UnknownOpcode { pc: usize, opcode: u16 }, + TruncatedOpcode { pc: usize }, + TruncatedImmediate { pc: usize, opcode: OpCode, need: usize, have: usize }, + InvalidJumpTarget { pc: usize, target: usize }, + JumpToMidInstruction { pc: usize, target: usize }, + StackUnderflow { pc: usize, opcode: OpCode }, + StackMismatchJoin { pc: usize, target: usize, height_in: u16, height_target: u16 }, + BadRetStackHeight { pc: usize, height: u16, expected: u16 }, + FunctionOutOfBounds { func_idx: usize, start: usize, end: usize, code_len: usize }, + InvalidSyscallId { pc: usize, id: u32 }, + TrailingBytes { func_idx: usize, at_pc: usize }, + InvalidFuncId { pc: usize, id: u32 }, +} + +pub struct Verifier; + +impl Verifier { + pub fn verify(code: &[u8], functions: &[FunctionMeta]) -> Result, VerifierError> { + let mut max_stacks = Vec::with_capacity(functions.len()); + for (i, func) in functions.iter().enumerate() { + max_stacks.push(Self::verify_function(code, func, i, functions)?); + } + Ok(max_stacks) + } + + fn verify_function(code: &[u8], func: &FunctionMeta, func_idx: usize, all_functions: &[FunctionMeta]) -> Result { + let func_start = func.code_offset as usize; + let func_end = func_start + func.code_len as usize; + + if func_start > code.len() || func_end > code.len() || func_start > func_end { + return Err(VerifierError::FunctionOutOfBounds { + func_idx, + start: func_start, + end: func_end, + code_len: code.len(), + }); + } + + let func_code = &code[func_start..func_end]; + + // First pass: find all valid instruction boundaries + let mut valid_pc = HashSet::new(); + let mut pc = 0; + while pc < func_code.len() { + valid_pc.insert(pc); + let instr = decode_at(func_code, pc).map_err(|e| match e { + DecodeError::UnknownOpcode { pc: _, opcode } => + VerifierError::UnknownOpcode { pc: func_start + pc, opcode }, + DecodeError::TruncatedOpcode { pc: _ } => + VerifierError::TruncatedOpcode { pc: func_start + pc }, + DecodeError::TruncatedImmediate { pc: _, opcode, need, have } => + VerifierError::TruncatedImmediate { pc: func_start + pc, opcode, need, have }, + })?; + pc = instr.next_pc; + } + + if pc != func_code.len() { + return Err(VerifierError::TrailingBytes { func_idx, at_pc: func_start + pc }); + } + + let mut stack_height_in: HashMap = HashMap::new(); + let mut worklist = VecDeque::new(); + let mut max_stack: u16 = 0; + + // Start from function entry + stack_height_in.insert(0, 0); + worklist.push_back(0); + + while let Some(pc) = worklist.pop_front() { + let in_height = *stack_height_in.get(&pc).unwrap(); + let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass + let spec = instr.spec; + + // Resolve dynamic pops/pushes + let (pops, pushes) = match instr.opcode { + OpCode::PopN => { + let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as u16; + (n, 0) + } + OpCode::Call => { + let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let callee = all_functions.get(func_id as usize).ok_or_else(|| { + VerifierError::InvalidFuncId { pc: func_start + pc, id: func_id } + })?; + (callee.param_slots, callee.return_slots) + } + OpCode::Ret => { + (func.return_slots, 0) + } + OpCode::Syscall => { + let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let syscall = crate::hardware::syscalls::Syscall::from_u32(id).ok_or_else(|| { + VerifierError::InvalidSyscallId { pc: func_start + pc, id } + })?; + (syscall.args_count() as u16, syscall.results_count() as u16) + } + _ => (spec.pops, spec.pushes), + }; + + if in_height < pops { + return Err(VerifierError::StackUnderflow { pc: func_start + pc, opcode: instr.opcode }); + } + + let out_height = in_height - pops + pushes; + max_stack = max_stack.max(out_height); + + if instr.opcode == OpCode::Ret { + if in_height != func.return_slots { + return Err(VerifierError::BadRetStackHeight { pc: func_start + pc, height: in_height, expected: func.return_slots }); + } + } + + // Propagate to successors + if spec.is_branch { + let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + + if target >= func.code_len as usize { + return Err(VerifierError::InvalidJumpTarget { pc: func_start + pc, target: func_start + target }); + } + if !valid_pc.contains(&target) { + return Err(VerifierError::JumpToMidInstruction { pc: func_start + pc, target: func_start + target }); + } + + if let Some(&existing_height) = stack_height_in.get(&target) { + if existing_height != out_height { + return Err(VerifierError::StackMismatchJoin { pc: func_start + pc, target: func_start + target, height_in: out_height, height_target: existing_height }); + } + } else { + stack_height_in.insert(target, out_height); + worklist.push_back(target); + } + } + + if !spec.is_terminator { + let next_pc = instr.next_pc; + if next_pc < func.code_len as usize { + if let Some(&existing_height) = stack_height_in.get(&next_pc) { + if existing_height != out_height { + return Err(VerifierError::StackMismatchJoin { pc: func_start + pc, target: func_start + next_pc, height_in: out_height, height_target: existing_height }); + } + } else { + stack_height_in.insert(next_pc, out_height); + worklist.push_back(next_pc); + } + } + } + } + + Ok(max_stack) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_verifier_underflow() { + // OpCode::Add (2 bytes) + let code = vec![OpCode::Add as u8, 0x00]; + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 2, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Add })); + } + + #[test] + fn test_verifier_dup_underflow() { + let code = vec![(OpCode::Dup as u16).to_le_bytes()[0], (OpCode::Dup as u16).to_le_bytes()[1]]; + let functions = vec![FunctionMeta { code_offset: 0, code_len: 2, ..Default::default() }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::StackUnderflow { pc: 0, opcode: OpCode::Dup })); + } + + #[test] + fn test_verifier_invalid_jmp_target() { + // Jmp (2 bytes) + 100u32 (4 bytes) + let mut code = vec![OpCode::Jmp as u8, 0x00]; + code.extend_from_slice(&100u32.to_le_bytes()); + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 6, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::InvalidJumpTarget { pc: 0, target: 100 })); + } + + #[test] + fn test_verifier_jmp_to_mid_instr() { + // PushI32 (2 bytes) + 42u32 (4 bytes) + // Jmp 1 (middle of PushI32) + let mut code = vec![OpCode::PushI32 as u8, 0x00]; + code.extend_from_slice(&42u32.to_le_bytes()); + code.push(OpCode::Jmp as u8); + code.push(0x00); + code.extend_from_slice(&1u32.to_le_bytes()); + + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 12, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::JumpToMidInstruction { pc: 6, target: 1 })); + } + + #[test] + fn test_verifier_truncation_opcode() { + let code = vec![OpCode::PushI32 as u8]; // Truncated u16 opcode + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 1, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::TruncatedOpcode { pc: 0 })); + } + + #[test] + fn test_verifier_truncation_immediate() { + let mut code = vec![OpCode::PushI32 as u8, 0x00]; + code.push(0x42); // Only 1 byte of 4-byte immediate + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 3, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::TruncatedImmediate { pc: 0, opcode: OpCode::PushI32, need: 4, have: 1 })); + } + + #[test] + fn test_verifier_stack_mismatch_join() { + // Let's make it reachable: + // 0: PushBool true + // 3: JmpIfTrue 15 + // 9: Jmp 27 + // 15: PushI32 1 + // 21: Jmp 27 + // 27: Nop + + let mut code = Vec::new(); + code.push(OpCode::PushBool as u8); code.push(0x00); code.push(1); // 0: PushBool (3 bytes) + code.push(OpCode::JmpIfTrue as u8); code.push(0x00); code.extend_from_slice(&15u32.to_le_bytes()); // 3: JmpIfTrue (6 bytes) + code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&27u32.to_le_bytes()); // 9: Jmp (6 bytes) + code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes()); // 15: PushI32 (6 bytes) + code.push(OpCode::Jmp as u8); code.push(0x00); code.extend_from_slice(&27u32.to_le_bytes()); // 21: Jmp (6 bytes) + code.push(OpCode::Nop as u8); code.push(0x00); // 27: Nop (2 bytes) + + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 29, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + // Path 0->3->9->27: height 1-1+0 = 0. + // Path 0->3->15->21->27: height 1-1+1 = 1. + // Mismatch at 27: 0 vs 1. + + assert_eq!(res, Err(VerifierError::StackMismatchJoin { pc: 21, target: 27, height_in: 1, height_target: 0 })); + } + + #[test] + fn test_verifier_bad_ret_height() { + // PushI32 1 (6 bytes) + // Ret (2 bytes) + let mut code = vec![OpCode::PushI32 as u8, 0x00]; + code.extend_from_slice(&1u32.to_le_bytes()); + code.push(OpCode::Ret as u8); + code.push(0x00); + + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 8, + return_slots: 0, // Expected 0, but got 1 + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions); + assert_eq!(res, Err(VerifierError::BadRetStackHeight { pc: 6, height: 1, expected: 0 })); + } + + #[test] + fn test_verifier_max_stack() { + // PushI32 1 + // PushI32 2 + // Add + // Ret + let mut code = Vec::new(); + code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&1u32.to_le_bytes()); + code.push(OpCode::PushI32 as u8); code.push(0x00); code.extend_from_slice(&2u32.to_le_bytes()); + code.push(OpCode::Add as u8); code.push(0x00); + code.push(OpCode::Ret as u8); code.push(0x00); + + let functions = vec![FunctionMeta { + code_offset: 0, + code_len: 16, + return_slots: 1, + ..Default::default() + }]; + let res = Verifier::verify(&code, &functions).unwrap(); + assert_eq!(res[0], 2); + } +} diff --git a/crates/prometeu-core/src/virtual_machine/virtual_machine.rs b/crates/prometeu-core/src/virtual_machine/virtual_machine.rs index 80ee1332..73b61911 100644 --- a/crates/prometeu-core/src/virtual_machine/virtual_machine.rs +++ b/crates/prometeu-core/src/virtual_machine/virtual_machine.rs @@ -88,7 +88,7 @@ impl VirtualMachine { call_stack: Vec::new(), scope_stack: Vec::new(), globals: Vec::new(), - program: Program::new(rom, constant_pool), + program: Program::new(rom, constant_pool, vec![]), heap: Vec::new(), cycles: 0, halted: false, @@ -115,6 +115,10 @@ impl VirtualMachine { let program = if program_bytes.starts_with(b"PPBC") { // PBC (Prometeu ByteCode) legacy format let pbc_file = pbc::parse_pbc(&program_bytes).map_err(|_| VmInitError::PpbcParseFailed)?; + + // Policy (A): Reject legacy CALL encoding in legacy formats. + Self::legacy_reject_call_encoding(&pbc_file.rom)?; + let cp = pbc_file.cp.into_iter().map(|entry| match entry { ConstantPoolEntry::Int32(v) => Value::Int32(v), ConstantPoolEntry::Int64(v) => Value::Int64(v), @@ -123,11 +127,20 @@ impl VirtualMachine { ConstantPoolEntry::String(v) => Value::String(v), ConstantPoolEntry::Null => Value::Null, }).collect(); - Program::new(pbc_file.rom, cp) + Program::new(pbc_file.rom, cp, vec![]) } else if program_bytes.starts_with(b"PBS\0") { // PBS v0 industrial format match prometeu_bytecode::v0::BytecodeLoader::load(&program_bytes) { - Ok(module) => { + Ok(mut module) => { + // Run verifier + let max_stacks = crate::virtual_machine::verifier::Verifier::verify(&module.code, &module.functions) + .map_err(VmInitError::VerificationFailed)?; + + // Apply verified max_stack_slots + for (func, max_stack) in module.functions.iter_mut().zip(max_stacks) { + func.max_stack_slots = max_stack; + } + let cp = module.const_pool.into_iter().map(|entry| match entry { ConstantPoolEntry::Int32(v) => Value::Int32(v), ConstantPoolEntry::Int64(v) => Value::Int64(v), @@ -136,7 +149,7 @@ impl VirtualMachine { ConstantPoolEntry::String(v) => Value::String(v), ConstantPoolEntry::Null => Value::Null, }).collect(); - Program::new(module.code, cp) + Program::new(module.code, cp, module.functions) } Err(prometeu_bytecode::v0::LoadError::InvalidVersion) => return Err(VmInitError::UnsupportedFormat), Err(e) => { @@ -175,6 +188,10 @@ impl VirtualMachine { 0 }; + let func_idx = self.program.functions.iter().position(|f| { + addr >= f.code_offset as usize && addr < (f.code_offset + f.code_len) as usize + }).unwrap_or(0); + self.pc = addr; self.halted = false; @@ -187,8 +204,22 @@ impl VirtualMachine { self.call_stack.push(CallFrame { return_pc: self.program.rom.len() as u32, stack_base: 0, + func_idx, }); } + + fn legacy_reject_call_encoding(rom: &[u8]) -> Result<(), VmInitError> { + let mut pc = 0usize; + while pc < rom.len() { + let instr = crate::virtual_machine::bytecode::decoder::decode_at(rom, pc) + .map_err(|_| VmInitError::PpbcParseFailed)?; + if instr.opcode == OpCode::Call { + return Err(VmInitError::UnsupportedLegacyCallEncoding); + } + pc = instr.next_pc; + } + Ok(()) + } } impl Default for VirtualMachine { @@ -313,8 +344,11 @@ impl VirtualMachine { let start_pc = self.pc; // Fetch & Decode - let opcode_val = self.read_u16().map_err(|e| LogicalFrameEndingReason::Panic(e))?; - let opcode = OpCode::try_from(opcode_val).map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let instr = crate::virtual_machine::bytecode::decoder::decode_at(&self.program.rom, self.pc) + .map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; + + let opcode = instr.opcode; + self.pc = instr.next_pc; // Execute match opcode { @@ -323,42 +357,44 @@ impl VirtualMachine { self.halted = true; } OpCode::Jmp => { - let addr = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; - self.pc = addr; + let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let func_start = self.call_stack.last().map(|f| self.program.functions[f.func_idx].code_offset as usize).unwrap_or(0); + self.pc = func_start + target; } OpCode::JmpIfFalse => { - let addr = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Boolean(false) = val { - self.pc = addr; + let func_start = self.call_stack.last().map(|f| self.program.functions[f.func_idx].code_offset as usize).unwrap_or(0); + self.pc = func_start + target; } } OpCode::JmpIfTrue => { - let addr = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let target = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Boolean(true) = val { - self.pc = addr; + let func_start = self.call_stack.last().map(|f| self.program.functions[f.func_idx].code_offset as usize).unwrap_or(0); + self.pc = func_start + target; } } OpCode::Trap => { - // Handled in run_budget for interruption, - // but we need to advance PC if executed via step() directly. + // Handled in run_budget for interruption } OpCode::PushConst => { - let idx = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.program.constant_pool.get(idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid constant index".into()))?; self.push(val); } OpCode::PushI64 => { - let val = self.read_i64().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let val = i64::from_le_bytes(instr.imm[0..8].try_into().unwrap()); self.push(Value::Int64(val)); } OpCode::PushI32 => { - let val = self.read_i32().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let val = i32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); self.push(Value::Int32(val)); } OpCode::PushBounded => { - let val = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let val = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); if val > 0xFFFF { return Err(LogicalFrameEndingReason::Trap(TrapInfo { code: TRAP_OOB, @@ -370,18 +406,18 @@ impl VirtualMachine { self.push(Value::Bounded(val)); } OpCode::PushF64 => { - let val = self.read_f64().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let val = f64::from_le_bytes(instr.imm[0..8].try_into().unwrap()); self.push(Value::Float(val)); } OpCode::PushBool => { - let val = self.read_u8().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let val = instr.imm[0]; self.push(Value::Boolean(val != 0)); } OpCode::Pop => { self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; } OpCode::PopN => { - let n = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let n = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); for _ in 0..n { self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; } @@ -557,12 +593,12 @@ impl VirtualMachine { } } OpCode::GetGlobal => { - let idx = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.globals.get(idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid global index".into()))?; self.push(val); } OpCode::SetGlobal => { - let idx = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if idx >= self.globals.len() { self.globals.resize(idx + 1, Value::Null); @@ -570,13 +606,13 @@ impl VirtualMachine { self.globals[idx] = val; } OpCode::GetLocal => { - let idx = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?; let val = self.operand_stack.get(frame.stack_base + idx).cloned().ok_or_else(|| LogicalFrameEndingReason::Panic("Invalid local index".into()))?; self.push(val); } OpCode::SetLocal => { - let idx = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let idx = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; let frame = self.call_stack.last().ok_or_else(|| LogicalFrameEndingReason::Panic("No active call frame".into()))?; let stack_idx = frame.stack_base + idx; @@ -586,20 +622,33 @@ impl VirtualMachine { self.operand_stack[stack_idx] = val; } OpCode::Call => { - let addr = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; - let args_count = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; - let stack_base = self.operand_stack.len() - args_count; + let func_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; + let callee = self.program.functions.get(func_id).ok_or_else(|| LogicalFrameEndingReason::Panic(format!("Invalid func_id {}", func_id)))?; + + let stack_base = self.operand_stack.len() - callee.param_slots as usize; self.call_stack.push(CallFrame { return_pc: self.pc as u32, stack_base, + func_idx: func_id, }); - self.pc = addr; + self.pc = callee.code_offset as usize; } OpCode::Ret => { let frame = self.call_stack.pop().ok_or_else(|| LogicalFrameEndingReason::Panic("Call stack underflow".into()))?; - let return_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let func = &self.program.functions[frame.func_idx]; + let return_slots = func.return_slots as usize; + + // Copy return values + let mut return_vals = Vec::with_capacity(return_slots); + for _ in 0..return_slots { + return_vals.push(self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?); + } + return_vals.reverse(); + self.operand_stack.truncate(frame.stack_base); - self.push(return_val); + for val in return_vals { + self.push(val); + } self.pc = frame.return_pc as usize; } OpCode::PushScope => { @@ -612,8 +661,8 @@ impl VirtualMachine { self.operand_stack.truncate(frame.scope_stack_base); } OpCode::Alloc => { - let _type_id = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))?; - let slots = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let _type_id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); + let slots = u32::from_le_bytes(instr.imm[4..8].try_into().unwrap()) as usize; let ref_idx = self.heap.len(); for _ in 0..slots { self.heap.push(Value::Null); @@ -621,7 +670,7 @@ impl VirtualMachine { self.push(Value::Gate(ref_idx)); } OpCode::GateLoad => { - let offset = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let offset = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Gate(base) = ref_val { let val = self.heap.get(base + offset).cloned().ok_or_else(|| { @@ -643,7 +692,7 @@ impl VirtualMachine { } } OpCode::GateStore => { - let offset = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))? as usize; + let offset = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()) as usize; let val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; let ref_val = self.pop().map_err(|e| LogicalFrameEndingReason::Panic(e))?; if let Value::Gate(base) = ref_val { @@ -676,7 +725,7 @@ impl VirtualMachine { OpCode::Syscall => { let pc_at_syscall = start_pc as u32; - let id = self.read_u32().map_err(|e| LogicalFrameEndingReason::Panic(e))?; + let id = u32::from_le_bytes(instr.imm[0..4].try_into().unwrap()); let syscall = crate::hardware::syscalls::Syscall::from_u32(id).ok_or_else(|| { LogicalFrameEndingReason::Trap(TrapInfo { @@ -724,75 +773,6 @@ impl VirtualMachine { Ok(()) } - fn read_u32(&mut self) -> Result { - if self.pc + 4 > self.program.rom.len() { - return Err("Unexpected end of ROM".into()); - } - let bytes = [ - self.program.rom[self.pc], - self.program.rom[self.pc + 1], - self.program.rom[self.pc + 2], - self.program.rom[self.pc + 3], - ]; - self.pc += 4; - Ok(u32::from_le_bytes(bytes)) - } - - fn read_i32(&mut self) -> Result { - if self.pc + 4 > self.program.rom.len() { - return Err("Unexpected end of ROM".into()); - } - let bytes = [ - self.program.rom[self.pc], - self.program.rom[self.pc + 1], - self.program.rom[self.pc + 2], - self.program.rom[self.pc + 3], - ]; - self.pc += 4; - Ok(i32::from_le_bytes(bytes)) - } - - fn read_i64(&mut self) -> Result { - if self.pc + 8 > self.program.rom.len() { - return Err("Unexpected end of ROM".into()); - } - let mut bytes = [0u8; 8]; - bytes.copy_from_slice(&self.program.rom[self.pc..self.pc + 8]); - self.pc += 8; - Ok(i64::from_le_bytes(bytes)) - } - - fn read_f64(&mut self) -> Result { - if self.pc + 8 > self.program.rom.len() { - return Err("Unexpected end of ROM".into()); - } - let mut bytes = [0u8; 8]; - bytes.copy_from_slice(&self.program.rom[self.pc..self.pc + 8]); - self.pc += 8; - Ok(f64::from_le_bytes(bytes)) - } - - fn read_u16(&mut self) -> Result { - if self.pc + 2 > self.program.rom.len() { - return Err("Unexpected end of ROM".into()); - } - let bytes = [ - self.program.rom[self.pc], - self.program.rom[self.pc + 1], - ]; - self.pc += 2; - Ok(u16::from_le_bytes(bytes)) - } - - fn read_u8(&mut self) -> Result { - if self.pc + 1 > self.program.rom.len() { - return Err("Unexpected end of ROM".into()); - } - let val = self.program.rom[self.pc]; - self.pc += 1; - Ok(val) - } - pub fn push(&mut self, val: Value) { self.operand_stack.push(val); } @@ -833,6 +813,7 @@ impl VirtualMachine { #[cfg(test)] mod tests { use super::*; + use prometeu_bytecode::v0::FunctionMeta; use crate::hardware::HardwareBridge; use crate::virtual_machine::{Value, HostReturn, VmFault, expect_int}; @@ -930,19 +911,15 @@ mod tests { // entrypoint: // PUSH_I64 10 - // CALL func_addr, 1 (args_count = 1) + // CALL func_id 1 // HALT - let func_addr = 2 + 8 + 2 + 4 + 4 + 2; // PUSH_I64(2+8) + CALL(2+4+4) + HALT(2) - rom.extend_from_slice(&(OpCode::PushI64 as u16).to_le_bytes()); rom.extend_from_slice(&10i64.to_le_bytes()); rom.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); - rom.extend_from_slice(&(func_addr as u32).to_le_bytes()); - rom.extend_from_slice(&1u32.to_le_bytes()); // 1 arg + rom.extend_from_slice(&1u32.to_le_bytes()); // func_id 1 rom.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); - // Ensure the current PC is exactly at func_addr - assert_eq!(rom.len(), func_addr); + let func_addr = rom.len(); // func: // PUSH_SCOPE @@ -966,7 +943,22 @@ mod tests { rom.extend_from_slice(&0u32.to_le_bytes()); rom.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); - let mut vm = VirtualMachine::new(rom, vec![]); + let functions = vec![ + FunctionMeta { code_offset: 0, code_len: func_addr as u32, ..Default::default() }, + FunctionMeta { + code_offset: func_addr as u32, + code_len: (rom.len() - func_addr) as u32, + param_slots: 1, + return_slots: 1, + ..Default::default() + }, + ]; + + let mut vm = VirtualMachine { + program: Program::new(rom, vec![], functions), + ..Default::default() + }; + vm.prepare_call("0"); let mut native = MockNative; let mut hw = MockHardware; @@ -980,24 +972,38 @@ mod tests { assert!(vm.halted); assert_eq!(vm.pop_integer().unwrap(), 30); assert_eq!(vm.operand_stack.len(), 0); - assert_eq!(vm.call_stack.len(), 0); + assert_eq!(vm.call_stack.len(), 1); assert_eq!(vm.scope_stack.len(), 0); } #[test] fn test_ret_mandatory_value() { let mut rom = Vec::new(); - // entrypoint: CALL func, 0; HALT - let func_addr = (2 + 4 + 4) + 2; + // entrypoint: CALL func_id 1; HALT rom.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); - rom.extend_from_slice(&(func_addr as u32).to_le_bytes()); - rom.extend_from_slice(&0u32.to_le_bytes()); // 0 args + rom.extend_from_slice(&1u32.to_le_bytes()); // func_id 1 rom.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); + let func_addr = rom.len(); // func: RET (SEM VALOR ANTES) rom.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); - let mut vm = VirtualMachine::new(rom, vec![]); + let functions = vec![ + FunctionMeta { code_offset: 0, code_len: func_addr as u32, ..Default::default() }, + FunctionMeta { + code_offset: func_addr as u32, + code_len: (rom.len() - func_addr) as u32, + param_slots: 0, + return_slots: 1, + ..Default::default() + }, + ]; + + let mut vm = VirtualMachine { + program: Program::new(rom, vec![], functions), + ..Default::default() + }; + vm.prepare_call("0"); let mut native = MockNative; let mut hw = MockHardware; @@ -1012,14 +1018,29 @@ mod tests { // Agora com valor de retorno let mut rom2 = Vec::new(); rom2.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); - rom2.extend_from_slice(&(func_addr as u32).to_le_bytes()); - rom2.extend_from_slice(&0u32.to_le_bytes()); + rom2.extend_from_slice(&1u32.to_le_bytes()); rom2.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); + let func_addr2 = rom2.len(); rom2.extend_from_slice(&(OpCode::PushI64 as u16).to_le_bytes()); rom2.extend_from_slice(&123i64.to_le_bytes()); rom2.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); - let mut vm2 = VirtualMachine::new(rom2, vec![]); + let functions2 = vec![ + FunctionMeta { code_offset: 0, code_len: func_addr2 as u32, ..Default::default() }, + FunctionMeta { + code_offset: func_addr2 as u32, + code_len: (rom2.len() - func_addr2) as u32, + param_slots: 0, + return_slots: 1, + ..Default::default() + }, + ]; + + let mut vm2 = VirtualMachine { + program: Program::new(rom2, vec![], functions2), + ..Default::default() + }; + vm2.prepare_call("0"); vm2.step(&mut native, &mut hw).unwrap(); // CALL vm2.step(&mut native, &mut hw).unwrap(); // PUSH_I64 vm2.step(&mut native, &mut hw).unwrap(); // RET @@ -1090,28 +1111,20 @@ mod tests { let mut rom = Vec::new(); // PUSH_I64 100 - // CALL func_addr, 0 + // CALL func_id 1 // HALT - let func_addr = 2 + 8 + 2 + 4 + 4 + 2; - rom.extend_from_slice(&(OpCode::PushI64 as u16).to_le_bytes()); rom.extend_from_slice(&100i64.to_le_bytes()); rom.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); - rom.extend_from_slice(&(func_addr as u32).to_le_bytes()); - rom.extend_from_slice(&0u32.to_le_bytes()); // 0 args + rom.extend_from_slice(&1u32.to_le_bytes()); // func_id 1 rom.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); + let func_addr = rom.len(); // func: // PUSH_I64 200 // PUSH_SCOPE // PUSH_I64 300 - // RET <-- Error! RET called with open scope. - // Wait, the requirement says "Ret ignores closed scopes", - // but if we have an OPEN scope, what should happen? - // The PR objective says "Ret destroys the call frame current... does not mess in intermediate scopes (they must have already been closed)" - // This means the COMPILER is responsible for closing them. - // If the compiler doesn't, the operand stack might be dirty. - // Let's test if RET works even with a scope open, and if it cleans up correctly. + // RET rom.extend_from_slice(&(OpCode::PushI64 as u16).to_le_bytes()); rom.extend_from_slice(&200i64.to_le_bytes()); @@ -1120,7 +1133,22 @@ mod tests { rom.extend_from_slice(&300i64.to_le_bytes()); rom.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); - let mut vm = VirtualMachine::new(rom, vec![]); + let functions = vec![ + FunctionMeta { code_offset: 0, code_len: func_addr as u32, ..Default::default() }, + FunctionMeta { + code_offset: func_addr as u32, + code_len: (rom.len() - func_addr) as u32, + param_slots: 0, + return_slots: 1, + ..Default::default() + }, + ]; + + let mut vm = VirtualMachine { + program: Program::new(rom, vec![], functions), + ..Default::default() + }; + vm.prepare_call("0"); let mut native = MockNative; let mut hw = MockHardware; @@ -1131,18 +1159,9 @@ mod tests { } assert!(vm.halted); - // RET will pop 300 as return value. - // It will truncate operand_stack to call_frame.stack_base (which was 1, after the first PUSH_I64 100). - // Then it pushes return value (300). - // So the stack should have [100, 300]. assert_eq!(vm.operand_stack.len(), 2); assert_eq!(vm.operand_stack[0], Value::Int64(100)); assert_eq!(vm.operand_stack[1], Value::Int64(300)); - - // Check if scope_stack was leaked (it currently would be if we don't clear it on RET) - // The PR doesn't explicitly say RET should clear scope_stack, but it's good practice. - // "Don't touch intermediate scopes (they should have already been closed)" - // If they were closed, scope_stack would be empty for this frame. } #[test] @@ -1615,4 +1634,52 @@ mod tests { assert_eq!(vm.program.rom.len(), 0); assert_eq!(vm.cycles, 0); } + + #[test] + fn test_policy_a_reject_legacy_call() { + let mut vm = VirtualMachine::default(); + + // PBC Header (PPBC) + let mut pbc = b"PPBC".to_vec(); + pbc.extend_from_slice(&0u16.to_le_bytes()); // Version + pbc.extend_from_slice(&0u16.to_le_bytes()); // Flags + pbc.extend_from_slice(&0u32.to_le_bytes()); // CP count + pbc.extend_from_slice(&4u32.to_le_bytes()); // ROM size + + // ROM: CALL (2 bytes) + 4-byte immediate (from OpcodeSpec) + // Wait, OpcodeSpec says CALL imm_bytes is 4. + pbc.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); + pbc.extend_from_slice(&[0, 0, 0, 0]); + // Update ROM size to 6 + pbc[12..16].copy_from_slice(&6u32.to_le_bytes()); + + let res = vm.initialize(pbc, ""); + assert_eq!(res, Err(VmInitError::UnsupportedLegacyCallEncoding)); + } + + #[test] + fn test_policy_a_permit_call_pattern_in_immediate() { + let mut vm = VirtualMachine::default(); + + // PBC Header (PPBC) + let mut pbc = b"PPBC".to_vec(); + pbc.extend_from_slice(&0u16.to_le_bytes()); // Version + pbc.extend_from_slice(&0u16.to_le_bytes()); // Flags + pbc.extend_from_slice(&0u32.to_le_bytes()); // CP count + + // ROM: PUSH_I64 with a value that contains OpCode::Call bytes + let mut rom = Vec::new(); + rom.extend_from_slice(&(OpCode::PushI64 as u16).to_le_bytes()); + let call_val = OpCode::Call as u16; + let mut val_bytes = [0u8; 8]; + val_bytes[0..2].copy_from_slice(&call_val.to_le_bytes()); + rom.extend_from_slice(&val_bytes); + rom.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); + + pbc.extend_from_slice(&(rom.len() as u32).to_le_bytes()); // ROM size + pbc.extend_from_slice(&rom); + + let res = vm.initialize(pbc, ""); + assert!(res.is_ok(), "Should NOT fail if Call pattern is in immediate: {:?}", res); + } } diff --git a/docs/specs/pbs/files/PRs para Junie.md b/docs/specs/pbs/files/PRs para Junie.md index 9a9661a4..232f8b4a 100644 --- a/docs/specs/pbs/files/PRs para Junie.md +++ b/docs/specs/pbs/files/PRs para Junie.md @@ -1,45 +1,3 @@ -## PR-03 — Frame model v0: locals, operand stack, and function metadata - -**Why:** `let x: int = 1` failing usually means locals/frames are not modeled correctly. - -### Scope - -* Define `FunctionMeta`: - - * `code_offset`, `code_len` - * `param_slots`, `local_slots`, `return_slots` - * `max_stack_slots` (computed by verifier or compiler) -* Define `Frame`: - - * `base` (stack base index) - * `locals_base` (or equivalent) - * `return_slots` - * `pc_return` -* Decide representation: - - * Option A (recommended v0): **single VM stack** with fixed layout per frame: - - * `[args][locals][operand_stack...]` - * Use `base + local_index` addressing. - -### Deliverables - -* `CallStack` with `Vec` -* `enter_frame(meta)` allocates locals area (zero-init) -* `leave_frame()` reclaims to previous base - -### Tests - -* locals are isolated per call -* locals are zero-initialized -* stack is restored exactly after return - -### Acceptance - -* Locals are deterministic and independent from operand stack usage. - ---- - ## PR-04 — Locals opcodes: GET_LOCAL / SET_LOCAL / INIT_LOCAL **Why:** PBS `let` and parameters need first-class support.