From 5941cd724871ad60871aef21495d0d0b9704282e Mon Sep 17 00:00:00 2001 From: bQUARKz Date: Mon, 9 Feb 2026 23:17:51 +0000 Subject: [PATCH] pr 01 --- crates/prometeu-bytecode/src/abi.rs | 21 +--- crates/prometeu-bytecode/src/lib.rs | 1 + crates/prometeu-bytecode/src/opcode_spec.rs | 115 ++++++++++++++++++ .../src/backend/emit_bytecode.rs | 28 +++-- .../prometeu-compiler/src/building/linker.rs | 75 ++++++++---- .../src/building/orchestrator.rs | 23 +--- crates/prometeu-vm/src/bytecode/decoder.rs | 2 +- crates/prometeu-vm/src/opcode_spec.rs | 87 +------------ test-cartridges/canonical/golden/program.pbc | Bin 578 -> 629 bytes test-cartridges/canonical/prometeu.json | 3 +- 10 files changed, 194 insertions(+), 161 deletions(-) create mode 100644 crates/prometeu-bytecode/src/opcode_spec.rs diff --git a/crates/prometeu-bytecode/src/abi.rs b/crates/prometeu-bytecode/src/abi.rs index 6d4d54ac..b34fecb6 100644 --- a/crates/prometeu-bytecode/src/abi.rs +++ b/crates/prometeu-bytecode/src/abi.rs @@ -2,29 +2,14 @@ //! It specifies how instructions are encoded in bytes and how they interact with memory. use crate::opcode::OpCode; +use crate::opcode_spec::OpCodeSpecExt; /// Returns the size in bytes of the operands for a given OpCode. /// /// Note: This does NOT include the 2 bytes of the OpCode itself. /// For example, `PushI32` has a size of 4, but occupies 6 bytes in ROM (2 for OpCode + 4 for value). pub fn operand_size(opcode: OpCode) -> usize { - match opcode { - OpCode::PushConst => 4, - OpCode::PushI32 => 4, - OpCode::PushBounded => 4, - OpCode::PushI64 => 8, - OpCode::PushF64 => 8, - OpCode::PushBool => 1, - OpCode::PopN => 4, - OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => 4, - OpCode::GetGlobal | OpCode::SetGlobal => 4, - OpCode::GetLocal | OpCode::SetLocal => 4, - OpCode::Call => 4, // func_id(u32) - OpCode::Syscall => 4, - OpCode::Alloc => 8, // type_id(u32) + slots(u32) - OpCode::GateLoad | OpCode::GateStore => 4, // offset(u32) - _ => 0, - } + opcode.spec().imm_bytes as usize } // --- HIP Trap Codes --- @@ -85,7 +70,7 @@ pub fn is_jump(opcode: OpCode) -> bool { /// Checks if an instruction has any immediate operands in the instruction stream. pub fn has_immediate(opcode: OpCode) -> bool { - operand_size(opcode) > 0 + opcode.spec().imm_bytes > 0 } #[cfg(test)] diff --git a/crates/prometeu-bytecode/src/lib.rs b/crates/prometeu-bytecode/src/lib.rs index 0d5440cd..ac6a5d7d 100644 --- a/crates/prometeu-bytecode/src/lib.rs +++ b/crates/prometeu-bytecode/src/lib.rs @@ -14,6 +14,7 @@ //! - [`readwrite`]: Internal utilities for Little-Endian binary I/O. pub mod opcode; +pub mod opcode_spec; pub mod abi; pub mod readwrite; pub mod asm; diff --git a/crates/prometeu-bytecode/src/opcode_spec.rs b/crates/prometeu-bytecode/src/opcode_spec.rs new file mode 100644 index 00000000..5f302c7d --- /dev/null +++ b/crates/prometeu-bytecode/src/opcode_spec.rs @@ -0,0 +1,115 @@ +use crate::opcode::OpCode; + +/// Specification for a single OpCode. +/// All JMP/JMP_IF_* immediate are u32 absolute offsets from function start. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct OpcodeSpec { + pub name: &'static str, + pub imm_bytes: u8, // immediate payload size (decode) + pub pops: u16, // slots popped + pub pushes: u16, // slots pushed + pub is_branch: bool, // has a control-flow target + pub is_terminator: bool, // ends basic block: JMP/RET/TRAP/HALT + pub may_trap: bool, // runtime trap possible +} + +pub trait OpCodeSpecExt { + fn spec(&self) -> OpcodeSpec; +} + +impl OpCodeSpecExt for OpCode { + fn spec(&self) -> OpcodeSpec { + match self { + OpCode::Nop => OpcodeSpec { name: "NOP", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Halt => OpcodeSpec { name: "HALT", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: false }, + OpCode::Jmp => OpcodeSpec { name: "JMP", imm_bytes: 4, pops: 0, pushes: 0, is_branch: true, is_terminator: true, may_trap: false }, + OpCode::JmpIfFalse => OpcodeSpec { name: "JMP_IF_FALSE", imm_bytes: 4, pops: 1, pushes: 0, is_branch: true, is_terminator: false, may_trap: true }, + OpCode::JmpIfTrue => OpcodeSpec { name: "JMP_IF_TRUE", imm_bytes: 4, pops: 1, pushes: 0, is_branch: true, is_terminator: false, may_trap: true }, + OpCode::Trap => OpcodeSpec { name: "TRAP", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: true }, + OpCode::PushConst => OpcodeSpec { name: "PUSH_CONST", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Pop => OpcodeSpec { name: "POP", imm_bytes: 0, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PopN => OpcodeSpec { name: "POP_N", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Dup => OpcodeSpec { name: "DUP", imm_bytes: 0, pops: 1, pushes: 2, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Swap => OpcodeSpec { name: "SWAP", imm_bytes: 0, pops: 2, pushes: 2, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushI64 => OpcodeSpec { name: "PUSH_I64", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushF64 => OpcodeSpec { name: "PUSH_F64", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushBool => OpcodeSpec { name: "PUSH_BOOL", imm_bytes: 1, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushI32 => OpcodeSpec { name: "PUSH_I32", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PushBounded => OpcodeSpec { name: "PUSH_BOUNDED", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Add => OpcodeSpec { name: "ADD", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Sub => OpcodeSpec { name: "SUB", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Mul => OpcodeSpec { name: "MUL", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Div => OpcodeSpec { name: "DIV", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Mod => OpcodeSpec { name: "MOD", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::BoundToInt => OpcodeSpec { name: "BOUND_TO_INT", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::IntToBoundChecked => OpcodeSpec { name: "INT_TO_BOUND_CHECKED", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Eq => OpcodeSpec { name: "EQ", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Neq => OpcodeSpec { name: "NEQ", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Lt => OpcodeSpec { name: "LT", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Gt => OpcodeSpec { name: "GT", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::And => OpcodeSpec { name: "AND", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Or => OpcodeSpec { name: "OR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Not => OpcodeSpec { name: "NOT", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::BitAnd => OpcodeSpec { name: "BIT_AND", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::BitOr => OpcodeSpec { name: "BIT_OR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::BitXor => OpcodeSpec { name: "BIT_XOR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Shl => OpcodeSpec { name: "SHL", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Shr => OpcodeSpec { name: "SHR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Lte => OpcodeSpec { name: "LTE", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Gte => OpcodeSpec { name: "GTE", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Neg => OpcodeSpec { name: "NEG", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::GetGlobal => OpcodeSpec { name: "GET_GLOBAL", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::SetGlobal => OpcodeSpec { name: "SET_GLOBAL", imm_bytes: 4, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::GetLocal => OpcodeSpec { name: "GET_LOCAL", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::SetLocal => OpcodeSpec { name: "SET_LOCAL", imm_bytes: 4, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Call => OpcodeSpec { name: "CALL", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Ret => OpcodeSpec { name: "RET", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: false }, + OpCode::PushScope => OpcodeSpec { name: "PUSH_SCOPE", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::PopScope => OpcodeSpec { name: "POP_SCOPE", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + OpCode::Alloc => OpcodeSpec { name: "ALLOC", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateLoad => OpcodeSpec { name: "GATE_LOAD", imm_bytes: 4, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateStore => OpcodeSpec { name: "GATE_STORE", imm_bytes: 4, pops: 2, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateBeginPeek => OpcodeSpec { name: "GATE_BEGIN_PEEK", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateEndPeek => OpcodeSpec { name: "GATE_END_PEEK", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateBeginBorrow => OpcodeSpec { name: "GATE_BEGIN_BORROW", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateEndBorrow => OpcodeSpec { name: "GATE_END_BORROW", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateBeginMutate => OpcodeSpec { name: "GATE_BEGIN_MUTATE", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateEndMutate => OpcodeSpec { name: "GATE_END_MUTATE", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateRetain => OpcodeSpec { name: "GATE_RETAIN", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::GateRelease => OpcodeSpec { name: "GATE_RELEASE", imm_bytes: 0, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::Syscall => OpcodeSpec { name: "SYSCALL", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, + OpCode::FrameSync => OpcodeSpec { name: "FRAME_SYNC", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Infer the numeric range from the TryFrom mapping used in opcode.rs tests + // by scanning a plausible range (0..1024) and keeping all successful decodes. + #[test] + fn every_opcode_has_spec_and_imm_defined() { + let mut count = 0usize; + for val in 0u16..=1023u16 { + if let Ok(op) = OpCode::try_from(val) { + let spec = op.spec(); + // Access all fields to ensure they are present and not optimized away + let _ = ( + spec.name, + spec.imm_bytes, + spec.pops, + spec.pushes, + spec.is_branch, + spec.is_terminator, + spec.may_trap, + ); + // imm_bytes must be defined (0 is valid) + assert!(spec.imm_bytes >= 0, "imm_bytes must be defined for {op:?}"); + count += 1; + } + } + assert!(count > 0, "No opcodes were found via OpCode::try_from"); + } +} diff --git a/crates/prometeu-compiler/src/backend/emit_bytecode.rs b/crates/prometeu-compiler/src/backend/emit_bytecode.rs index 18ec47cd..0bce1ec0 100644 --- a/crates/prometeu-compiler/src/backend/emit_bytecode.rs +++ b/crates/prometeu-compiler/src/backend/emit_bytecode.rs @@ -69,12 +69,11 @@ pub fn emit_fragments(module: &ir_vm::Module) -> Result { let mut functions = Vec::new(); let mut function_names = Vec::new(); for (i, function) in module.functions.iter().enumerate() { - let (start_idx, end_idx) = function_ranges[i]; + let (start_idx, last_op_idx) = function_ranges[i]; let start_pc = pcs[start_idx]; - // Interpretamos `end_idx` como o índice da ÚLTIMA instrução pertencente à função (inclusivo). - // Portanto, o `end_pc` correto é o PC da próxima instrução (exclusivo). Se não houver próxima, - // usamos o tamanho total do bytecode. - let end_pc = if (end_idx + 1) < pcs.len() { pcs[end_idx + 1] } else { bytecode.len() as u32 }; + // `last_op_idx` aponta para o último Asm::Op pertencente à função. O PC de término canônico + // é o PC da próxima entrada em `pcs` (exclusivo). Labels subsequentes não alteram o PC. + let end_pc = if (last_op_idx + 1) < pcs.len() { pcs[last_op_idx + 1] } else { bytecode.len() as u32 }; // Nome enriquecido para tooling/analysis: "name@offset+len" let enriched_name = format!("{}@{}+{}", function.name, start_pc, end_pc - start_pc); @@ -179,6 +178,8 @@ impl BytecodeEmitter { let mut stack_height: i32 = 0; // Nome canônico para o label de término desta função let end_label = format!("{}::__end", function.name); + // Track last opcode index for this function (to exclude trailing padding/labels) + let mut last_op_idx_in_func: Option = None; for instr in &function.body { let op_start_idx = asm_instrs.len(); @@ -331,21 +332,22 @@ impl BytecodeEmitter { } let op_end_idx = asm_instrs.len(); + // If we just pushed an Op, record its index as last_op_idx_in_func + if op_end_idx > 0 { + if let Asm::Op(_, _) = &asm_instrs[op_end_idx - 1] { + last_op_idx_in_func = Some(op_end_idx - 1); + } + } for _ in op_start_idx..op_end_idx { ir_instr_map.push(Some(instr)); } } - // Para compatibilidade com geradores que efetuam saltos para o "fim da função", - // garantimos que exista ao menos um NOP antes do label final. Isso assegura que - // qualquer alvo que considere o label como posição exclusiva ou inclusiva não caia - // dentro do início da próxima função. - asm_instrs.push(Asm::Op(OpCode::Nop, vec![])); - ir_instr_map.push(None); // Emite label canônico de término no fim real do corpo asm_instrs.push(Asm::Label(end_label)); ir_instr_map.push(None); - let end_idx = asm_instrs.len(); - ranges.push((start_idx, end_idx)); + // Determine last op index; if function had no ops, fallback to the padding NOP we just injected + let last_op_idx = last_op_idx_in_func.unwrap_or(start_idx); + ranges.push((start_idx, last_op_idx)); } Ok(ranges) } diff --git a/crates/prometeu-compiler/src/building/linker.rs b/crates/prometeu-compiler/src/building/linker.rs index acb46c45..2232fd59 100644 --- a/crates/prometeu-compiler/src/building/linker.rs +++ b/crates/prometeu-compiler/src/building/linker.rs @@ -2,6 +2,7 @@ use crate::building::output::CompiledModule; use crate::building::plan::BuildStep; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::layout; +use prometeu_bytecode::opcode_spec::OpCodeSpecExt; use prometeu_bytecode::{ConstantPoolEntry, DebugInfo}; use std::collections::HashMap; use prometeu_abi::virtual_machine::{ProgramImage, Value}; @@ -219,51 +220,46 @@ impl Linker { }; pos += 2; + let imm_len = opcode.spec().imm_bytes as usize; match opcode { OpCode::PushConst => { - if pos + 4 <= end { + if pos + imm_len <= end && imm_len == 4 { let local_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()) as usize; if let Some(&global_idx) = local_to_global_const.get(local_idx) { combined_code[pos..pos+4].copy_from_slice(&global_idx.to_le_bytes()); } - pos += 4; } + pos += imm_len; } OpCode::Call => { - if pos + 4 <= end { + if pos + imm_len <= end && imm_len == 4 { let local_func_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()); - + // Check if this PC was already patched by an import. // If it wasn't, it's an internal call that needs relocation. // `import.relocation_pcs` holds the PC at the start of the CALL immediate (after opcode), // and here `pos` currently points exactly at that immediate. let reloc_pc = (pos - code_offset) as u32; let is_import = module.imports.iter().any(|imp| imp.relocation_pcs.contains(&reloc_pc)); - + if !is_import { let global_func_idx = module_function_offsets[i] + local_func_idx; combined_code[pos..pos+4].copy_from_slice(&global_func_idx.to_le_bytes()); } - pos += 4; } + pos += imm_len; } - // Do NOT relocate intra-function control flow. Branch immediates are - // function-relative by contract and must remain untouched by the linker. + // Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { - // Just skip the immediate - pos += 4; + if pos + imm_len <= end && imm_len == 4 { + patch_u32_at(&mut combined_code, pos, &|cur| cur + (code_offset as u32)); + } + pos += imm_len; } - OpCode::PushI32 | OpCode::PushBounded | OpCode::GetGlobal | OpCode::SetGlobal | OpCode::GetLocal | OpCode::SetLocal - | OpCode::PopN | OpCode::Syscall | OpCode::GateLoad | OpCode::GateStore => { - pos += 4; + _ => { + // Generic advance using canonical immediate length. + pos += imm_len; } - OpCode::PushI64 | OpCode::PushF64 | OpCode::Alloc => { - pos += 8; - } - OpCode::PushBool => { - pos += 1; - } - _ => {} } } } @@ -299,6 +295,9 @@ impl Linker { // "name@offset+len", alinhar apenas o `code_len` de `combined_functions[idx]` a esses // valores (os offsets do DebugInfo são locais ao módulo antes do link). Mantemos o // `code_offset` já realocado durante o PASS 1. + // Track which function metas received a precise code_len from DebugInfo + let mut has_precise_len: Vec = vec![false; combined_functions.len()]; + for (idx, name) in &combined_function_names { if let Some((base, rest)) = name.split_once('@') { let mut parts = rest.split('+'); @@ -308,6 +307,7 @@ impl Linker { let old_off = meta.code_offset; let old_len = meta.code_len; meta.code_len = len; + has_precise_len[*idx as usize] = true; eprintln!( "[Linker][debug] Align len idx={} name={} -> code_offset {} (kept) | code_len {} -> {}", idx, base, old_off, old_len, len @@ -318,11 +318,30 @@ impl Linker { } } - // Recalcular code_len de todas as funções no código combinado com base no deslocamento da próxima função - // (end exclusivo). Isso garante que o fim efetivo da função seja exatamente o início da próxima - // no buffer combinado, evitando divergências em saltos para o fim da função. - // Use rotina canônica compartilhada para recalcular os comprimentos das funções - layout::recompute_function_lengths_in_place(&mut combined_functions, combined_code.len()); + // Ensure DebugInfo also contains plain base names alongside enriched names for easy lookup. + // For any entry of form "name@off+len", also add (idx, "name") if missing. + let mut plain_names_to_add: Vec<(u32, String)> = Vec::new(); + for (idx, name) in &combined_function_names { + if let Some((base, _)) = name.split_once('@') { + let already_has_plain = combined_function_names.iter().any(|(i, n)| i == idx && n == base); + if !already_has_plain { + plain_names_to_add.push((*idx, base.to_string())); + } + } + } + combined_function_names.extend(plain_names_to_add); + + // Recompute code_len ONLY for functions that did NOT receive a precise length from DebugInfo. + // This preserves exact ends emitted by the compiler while still filling lengths for functions + // that lack enriched annotations. + let total_len = combined_code.len(); + for i in 0..combined_functions.len() { + if !has_precise_len.get(i).copied().unwrap_or(false) { + let start = combined_functions[i].code_offset as usize; + let end = layout::function_end_from_next(&combined_functions, i, total_len); + combined_functions[i].code_len = end.saturating_sub(start) as u32; + } + } // Removido padding específico de `frame`; o emissor passou a garantir que o label de término // esteja no ponto exato do fim do corpo, e, quando necessário, insere NOPs reais antes do fim. @@ -343,6 +362,12 @@ impl Linker { let combined_debug_info = if combined_pc_to_span.is_empty() && combined_function_names.is_empty() { None } else { + // Ensure entry-point name mapping is present for easy lookup in DebugInfo + if let Some(frame_idx) = final_exports.get("frame") { + if !combined_function_names.iter().any(|(i, n)| i == frame_idx && n == "frame") { + combined_function_names.push((*frame_idx, "frame".to_string())); + } + } Some(DebugInfo { pc_to_span: combined_pc_to_span, function_names: combined_function_names, diff --git a/crates/prometeu-compiler/src/building/orchestrator.rs b/crates/prometeu-compiler/src/building/orchestrator.rs index 7853dd88..797f49c6 100644 --- a/crates/prometeu-compiler/src/building/orchestrator.rs +++ b/crates/prometeu-compiler/src/building/orchestrator.rs @@ -248,6 +248,7 @@ mod tests { #[test] fn test_framesync_injected_end_to_end() { use prometeu_bytecode::opcode::OpCode; + use prometeu_bytecode::opcode_spec::OpCodeSpecExt; let dir = tempdir().unwrap(); let project_dir = dir.path().to_path_buf(); fs::create_dir_all(project_dir.join("src/main/modules")).unwrap(); @@ -282,25 +283,9 @@ mod tests { // Decode sequentially: each instruction is a u16 opcode (LE) followed by operands. // We'll walk forward and record the sequence of opcodes, ignoring operands based on known sizes. fn operand_size(op: u16) -> usize { - match op { - x if x == OpCode::PushConst as u16 => 4, - x if x == OpCode::PushI64 as u16 => 8, - x if x == OpCode::PushF64 as u16 => 8, - x if x == OpCode::PushBool as u16 => 1, - x if x == OpCode::PushI32 as u16 => 4, - x if x == OpCode::PushBounded as u16 => 4, - x if x == OpCode::Jmp as u16 => 4, - x if x == OpCode::JmpIfFalse as u16 => 4, - x if x == OpCode::JmpIfTrue as u16 => 4, - x if x == OpCode::GetLocal as u16 => 4, - x if x == OpCode::SetLocal as u16 => 4, - x if x == OpCode::GetGlobal as u16 => 4, - x if x == OpCode::SetGlobal as u16 => 4, - x if x == OpCode::Alloc as u16 => 8, // type_id (u32) + slots (u32) - x if x == OpCode::Syscall as u16 => 4, - x if x == OpCode::GateLoad as u16 => 4, - x if x == OpCode::GateStore as u16 => 4, - _ => 0, + match OpCode::try_from(op) { + Ok(opc) => opc.spec().imm_bytes as usize, + Err(_) => 0, } } diff --git a/crates/prometeu-vm/src/bytecode/decoder.rs b/crates/prometeu-vm/src/bytecode/decoder.rs index d94af7da..4532c226 100644 --- a/crates/prometeu-vm/src/bytecode/decoder.rs +++ b/crates/prometeu-vm/src/bytecode/decoder.rs @@ -1,5 +1,5 @@ -use crate::opcode_spec::{OpCodeSpecExt, OpcodeSpec}; use prometeu_bytecode::opcode::OpCode; +use prometeu_bytecode::opcode_spec::{OpCodeSpecExt, OpcodeSpec}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum DecodeError { diff --git a/crates/prometeu-vm/src/opcode_spec.rs b/crates/prometeu-vm/src/opcode_spec.rs index bb81c4d2..a15ca29b 100644 --- a/crates/prometeu-vm/src/opcode_spec.rs +++ b/crates/prometeu-vm/src/opcode_spec.rs @@ -1,84 +1,3 @@ -use prometeu_bytecode::opcode::OpCode; - -/// Specification for a single OpCode. -/// All JMP/JMP_IF_* immediate are u32 absolute offsets from function start. -#[derive(Debug, Clone, Copy)] -pub struct OpcodeSpec { - pub name: &'static str, - pub imm_bytes: u8, // immediate payload size (decode) - pub pops: u16, // slots popped - pub pushes: u16, // slots pushed - pub is_branch: bool, // has a control-flow target - pub is_terminator: bool, // ends basic block: JMP/RET/TRAP/HALT - pub may_trap: bool, // runtime trap possible -} - -pub trait OpCodeSpecExt { - fn spec(&self) -> OpcodeSpec; -} - -impl OpCodeSpecExt for OpCode { - fn spec(&self) -> OpcodeSpec { - match self { - OpCode::Nop => OpcodeSpec { name: "NOP", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Halt => OpcodeSpec { name: "HALT", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: false }, - OpCode::Jmp => OpcodeSpec { name: "JMP", imm_bytes: 4, pops: 0, pushes: 0, is_branch: true, is_terminator: true, may_trap: false }, - OpCode::JmpIfFalse => OpcodeSpec { name: "JMP_IF_FALSE", imm_bytes: 4, pops: 1, pushes: 0, is_branch: true, is_terminator: false, may_trap: true }, - OpCode::JmpIfTrue => OpcodeSpec { name: "JMP_IF_TRUE", imm_bytes: 4, pops: 1, pushes: 0, is_branch: true, is_terminator: false, may_trap: true }, - OpCode::Trap => OpcodeSpec { name: "TRAP", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: true }, - OpCode::PushConst => OpcodeSpec { name: "PUSH_CONST", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Pop => OpcodeSpec { name: "POP", imm_bytes: 0, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PopN => OpcodeSpec { name: "POP_N", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Dup => OpcodeSpec { name: "DUP", imm_bytes: 0, pops: 1, pushes: 2, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Swap => OpcodeSpec { name: "SWAP", imm_bytes: 0, pops: 2, pushes: 2, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PushI64 => OpcodeSpec { name: "PUSH_I64", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PushF64 => OpcodeSpec { name: "PUSH_F64", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PushBool => OpcodeSpec { name: "PUSH_BOOL", imm_bytes: 1, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PushI32 => OpcodeSpec { name: "PUSH_I32", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PushBounded => OpcodeSpec { name: "PUSH_BOUNDED", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Add => OpcodeSpec { name: "ADD", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Sub => OpcodeSpec { name: "SUB", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Mul => OpcodeSpec { name: "MUL", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Div => OpcodeSpec { name: "DIV", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Mod => OpcodeSpec { name: "MOD", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::BoundToInt => OpcodeSpec { name: "BOUND_TO_INT", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::IntToBoundChecked => OpcodeSpec { name: "INT_TO_BOUND_CHECKED", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Eq => OpcodeSpec { name: "EQ", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Neq => OpcodeSpec { name: "NEQ", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Lt => OpcodeSpec { name: "LT", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Gt => OpcodeSpec { name: "GT", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::And => OpcodeSpec { name: "AND", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Or => OpcodeSpec { name: "OR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Not => OpcodeSpec { name: "NOT", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::BitAnd => OpcodeSpec { name: "BIT_AND", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::BitOr => OpcodeSpec { name: "BIT_OR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::BitXor => OpcodeSpec { name: "BIT_XOR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Shl => OpcodeSpec { name: "SHL", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Shr => OpcodeSpec { name: "SHR", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Lte => OpcodeSpec { name: "LTE", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Gte => OpcodeSpec { name: "GTE", imm_bytes: 0, pops: 2, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Neg => OpcodeSpec { name: "NEG", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::GetGlobal => OpcodeSpec { name: "GET_GLOBAL", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::SetGlobal => OpcodeSpec { name: "SET_GLOBAL", imm_bytes: 4, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::GetLocal => OpcodeSpec { name: "GET_LOCAL", imm_bytes: 4, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::SetLocal => OpcodeSpec { name: "SET_LOCAL", imm_bytes: 4, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Call => OpcodeSpec { name: "CALL", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Ret => OpcodeSpec { name: "RET", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: true, may_trap: false }, - OpCode::PushScope => OpcodeSpec { name: "PUSH_SCOPE", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::PopScope => OpcodeSpec { name: "POP_SCOPE", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - OpCode::Alloc => OpcodeSpec { name: "ALLOC", imm_bytes: 8, pops: 0, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateLoad => OpcodeSpec { name: "GATE_LOAD", imm_bytes: 4, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateStore => OpcodeSpec { name: "GATE_STORE", imm_bytes: 4, pops: 2, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateBeginPeek => OpcodeSpec { name: "GATE_BEGIN_PEEK", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateEndPeek => OpcodeSpec { name: "GATE_END_PEEK", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateBeginBorrow => OpcodeSpec { name: "GATE_BEGIN_BORROW", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateEndBorrow => OpcodeSpec { name: "GATE_END_BORROW", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateBeginMutate => OpcodeSpec { name: "GATE_BEGIN_MUTATE", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateEndMutate => OpcodeSpec { name: "GATE_END_MUTATE", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateRetain => OpcodeSpec { name: "GATE_RETAIN", imm_bytes: 0, pops: 1, pushes: 1, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::GateRelease => OpcodeSpec { name: "GATE_RELEASE", imm_bytes: 0, pops: 1, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::Syscall => OpcodeSpec { name: "SYSCALL", imm_bytes: 4, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: true }, - OpCode::FrameSync => OpcodeSpec { name: "FRAME_SYNC", imm_bytes: 0, pops: 0, pushes: 0, is_branch: false, is_terminator: false, may_trap: false }, - } - } -} +// Canonical `OpcodeSpec` now lives in `prometeu-bytecode`. +// Keep this module as a thin re-export for compatibility. +pub use prometeu_bytecode::opcode_spec::*; diff --git a/test-cartridges/canonical/golden/program.pbc b/test-cartridges/canonical/golden/program.pbc index a079f21730442c9453f42ab4da526f8fcc63a904..5a70490d7e7f3a43ff64e865774031b80c27af1b 100644 GIT binary patch delta 120 zcmX@a@|9(R4`&1;0|N^K1B2Vf(7BA}AQlMl194hWVs5H~fwq|mm;kC{5CP(_)MO*Q xoYXuAGZSrNBM3S98>6ba7*MjfC|N%@F*8p;H$SB`C$-oLYzR<-ffb0s901;r8x{Zn delta 43 wcmey$a)@Pu52q?40|N^K1B39!(7B8(OhDFTO(s=NRv;s-C^0v6vKNya0Kpdsg#Z8m diff --git a/test-cartridges/canonical/prometeu.json b/test-cartridges/canonical/prometeu.json index 46ec0c52..1e54e60e 100644 --- a/test-cartridges/canonical/prometeu.json +++ b/test-cartridges/canonical/prometeu.json @@ -1,5 +1,6 @@ { "name": "canonical", "version": "0.1.0", - "script_fe": "pbs" + "script_fe": "pbs", + "entry": "src/main/modules/main.pbs" }