This commit is contained in:
bQUARKz 2026-02-10 00:32:43 +00:00
parent cf94ecda84
commit 6eda2d21ac
Signed by: bquarkz
SSH Key Fingerprint: SHA256:Z7dgqoglWwoK6j6u4QC87OveEq74WOhFN+gitsxtkf8
13 changed files with 93 additions and 97 deletions

View File

@ -62,10 +62,7 @@ pub struct TrapInfo {
/// Checks if an instruction is a jump (branch) instruction. /// Checks if an instruction is a jump (branch) instruction.
pub fn is_jump(opcode: OpCode) -> bool { pub fn is_jump(opcode: OpCode) -> bool {
match opcode { opcode.spec().is_branch
OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => true,
_ => false,
}
} }
/// Checks if an instruction has any immediate operands in the instruction stream. /// Checks if an instruction has any immediate operands in the instruction stream.

View File

@ -0,0 +1,11 @@
pub fn read_u32_le(buf: &[u8], pos: usize) -> Option<u32> {
let b = buf.get(pos..pos + 4)?;
Some(u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
}
pub fn write_u32_le(buf: &mut [u8], pos: usize, v: u32) -> Option<()> {
let b = buf.get_mut(pos..pos + 4)?;
let le = v.to_le_bytes();
b.copy_from_slice(&le);
Some(())
}

View File

@ -53,12 +53,3 @@ pub fn function_index_by_pc(functions: &[FunctionMeta], code_len_total: usize, p
} }
None None
} }
/// Clamps an absolute jump target to the end (exclusive) of the enclosing
/// function identified by `func_idx`.
#[inline]
pub fn clamp_jump_target(functions: &[FunctionMeta], code_len_total: usize, func_idx: usize, target_abs: u32) -> u32 {
let start = functions.get(func_idx).map(|f| f.code_offset as usize).unwrap_or(0);
let end = function_end_from_next(functions, func_idx, code_len_total);
if (target_abs as usize) > end { end as u32 } else { target_abs }
}

View File

@ -23,5 +23,6 @@ pub mod layout;
pub mod decoder; pub mod decoder;
mod model; mod model;
pub mod io;
pub use model::*; pub use model::*;

View File

@ -2,14 +2,16 @@ use crate::building::output::CompiledModule;
use crate::building::plan::BuildStep; use crate::building::plan::BuildStep;
use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::layout; use prometeu_bytecode::layout;
use prometeu_bytecode::opcode_spec::OpCodeSpecExt; use prometeu_bytecode::decoder::decode_next;
use prometeu_bytecode::{ConstantPoolEntry, DebugInfo}; use prometeu_bytecode::{ConstantPoolEntry, DebugInfo};
use std::collections::HashMap; use std::collections::HashMap;
use prometeu_abi::virtual_machine::{ProgramImage, Value}; use prometeu_abi::virtual_machine::{ProgramImage, Value};
use prometeu_analysis::ids::ProjectId; use prometeu_analysis::ids::ProjectId;
use prometeu_bytecode::readwrite::{read_u32_le, write_u32_le};
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub enum LinkError { pub enum LinkError {
OutOfBounds(usize, usize),
UnresolvedSymbol(String), UnresolvedSymbol(String),
DuplicateExport(String), DuplicateExport(String),
IncompatibleSymbolSignature(String), IncompatibleSymbolSignature(String),
@ -18,6 +20,7 @@ pub enum LinkError {
impl std::fmt::Display for LinkError { impl std::fmt::Display for LinkError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
LinkError::OutOfBounds(pos, len) => write!(f, "Out of bounds: pos={} len={}", pos, len),
LinkError::UnresolvedSymbol(s) => write!(f, "Unresolved symbol: {}", s), LinkError::UnresolvedSymbol(s) => write!(f, "Unresolved symbol: {}", s),
LinkError::DuplicateExport(s) => write!(f, "Duplicate export: {}", s), LinkError::DuplicateExport(s) => write!(f, "Duplicate export: {}", s),
LinkError::IncompatibleSymbolSignature(s) => write!(f, "Incompatible symbol signature: {}", s), LinkError::IncompatibleSymbolSignature(s) => write!(f, "Incompatible symbol signature: {}", s),
@ -64,6 +67,7 @@ impl ConstantPoolBitKey {
} }
impl Linker { impl Linker {
pub fn link(modules: Vec<CompiledModule>, steps: Vec<BuildStep>) -> Result<ProgramImage, LinkError> { pub fn link(modules: Vec<CompiledModule>, steps: Vec<BuildStep>) -> Result<ProgramImage, LinkError> {
if modules.len() != steps.len() { if modules.len() != steps.len() {
return Err(LinkError::IncompatibleSymbolSignature(format!("Module count ({}) does not match build steps count ({})", modules.len(), steps.len()))); return Err(LinkError::IncompatibleSymbolSignature(format!("Module count ({}) does not match build steps count ({})", modules.len(), steps.len())));
@ -195,72 +199,75 @@ impl Linker {
} }
} }
// Internal call relocation (from module-local func_idx to global func_idx) let mut pc = code_offset;
// And PUSH_CONST relocation.
// Also relocate intra-module jump target addresses when modules are concatenated.
// Small helper to patch a 32-bit immediate at `pos` using a transformer function.
// Safety: caller must ensure `pos + 4 <= end`.
let mut patch_u32_at = |buf: &mut Vec<u8>, pos: usize, f: &dyn Fn(u32) -> u32| {
let current = u32::from_le_bytes(buf[pos..pos+4].try_into().unwrap());
let next = f(current);
buf[pos..pos+4].copy_from_slice(&next.to_le_bytes());
};
let mut pos = code_offset;
let end = code_offset + module.code.len(); let end = code_offset + module.code.len();
while pos < end { while pc < end {
if pos + 2 > end { break; } // Scope the immutable borrow from decode_next so we can mutate combined_code afterwards
let op_val = u16::from_le_bytes([combined_code[pos], combined_code[pos+1]]); let (opcode, next_pc, imm_start, imm_u32_opt) = {
let opcode = match OpCode::try_from(op_val) { match decode_next(pc, &combined_code) {
Ok(op) => op, Ok(instr) => {
Err(_) => { let opcode = instr.opcode;
pos += 2; let next_pc = instr.next_pc;
continue; let imm_start = instr.pc + 2; // start of immediate payload
let imm_u32_opt = match opcode {
OpCode::PushConst | OpCode::Call | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => {
match instr.imm_u32() {
Ok(v) => Some(v),
Err(_) => None,
}
}
_ => None,
};
(opcode, next_pc, imm_start, imm_u32_opt)
}
Err(e) => {
return Err(LinkError::IncompatibleSymbolSignature(format!(
"Bytecode decode error at pc {}: {:?}",
pc - code_offset, e
)));
}
} }
}; };
pos += 2;
let imm_len = opcode.spec().imm_bytes as usize;
match opcode { match opcode {
OpCode::PushConst => { OpCode::PushConst => {
if pos + imm_len <= end && imm_len == 4 { let local_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
let local_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()) as usize; "Invalid PUSH_CONST immediate at pc {}",
if let Some(&global_idx) = local_to_global_const.get(local_idx) { pc - code_offset
combined_code[pos..pos+4].copy_from_slice(&global_idx.to_le_bytes()); )))? as usize;
} if let Some(&global_idx) = local_to_global_const.get(local_idx) {
patch_u32_at(&mut combined_code, imm_start, &|_| global_idx);
} }
pos += imm_len;
} }
OpCode::Call => { OpCode::Call => {
if pos + imm_len <= end && imm_len == 4 { let local_func_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
let local_func_idx = u32::from_le_bytes(combined_code[pos..pos+4].try_into().unwrap()); "Invalid CALL immediate at pc {}",
pc - code_offset
// Check if this PC was already patched by an import. )))?;
// If it wasn't, it's an internal call that needs relocation. // Determine if this CALL site corresponds to an import relocation.
// `import.relocation_pcs` holds the PC at the start of the CALL immediate (after opcode), let reloc_rel_pc = (imm_start - code_offset) as u32;
// and here `pos` currently points exactly at that immediate. let is_import = module
let reloc_pc = (pos - code_offset) as u32; .imports
let is_import = module.imports.iter().any(|imp| imp.relocation_pcs.contains(&reloc_pc)); .iter()
.any(|imp| imp.relocation_pcs.contains(&reloc_rel_pc));
if !is_import { if !is_import {
let global_func_idx = module_function_offsets[i] + local_func_idx; let global_func_idx = module_function_offsets[i] + local_func_idx;
combined_code[pos..pos+4].copy_from_slice(&global_func_idx.to_le_bytes()); patch_u32_at(&mut combined_code, imm_start, &|_| global_func_idx);
}
} }
pos += imm_len;
} }
// Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs // Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs
OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => {
if pos + imm_len <= end && imm_len == 4 { // For branches, immediate must be present and represents rel PC from function start
patch_u32_at(&mut combined_code, pos, &|cur| cur + (code_offset as u32)); let _ = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
} "Invalid branch immediate at pc {}",
pos += imm_len; pc - code_offset
} )))?;
_ => { patch_u32_at(&mut combined_code, imm_start, &|cur| cur + (code_offset as u32));
// Generic advance using canonical immediate length.
pos += imm_len;
} }
_ => {}
} }
pc = next_pc;
} }
} }
@ -384,6 +391,17 @@ impl Linker {
} }
} }
fn patch_u32_at(
buf: &mut [u8],
pos: usize,
f: impl FnOnce(u32) -> u32,
) -> Result<(), LinkError> {
let current = prometeu_bytecode::io::read_u32_le(buf, pos).ok_or(LinkError::OutOfBounds(pos, buf.len()))?;
let next = f(current);
prometeu_bytecode::io::write_u32_le(buf, pos, next).ok_or(LinkError::OutOfBounds(pos, buf.len()))?;
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@ -248,7 +248,6 @@ mod tests {
#[test] #[test]
fn test_framesync_injected_end_to_end() { fn test_framesync_injected_end_to_end() {
use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::opcode_spec::OpCodeSpecExt;
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let project_dir = dir.path().to_path_buf(); let project_dir = dir.path().to_path_buf();
fs::create_dir_all(project_dir.join("src/main/modules")).unwrap(); fs::create_dir_all(project_dir.join("src/main/modules")).unwrap();
@ -280,22 +279,15 @@ mod tests {
let end = (meta.code_offset + meta.code_len) as usize; let end = (meta.code_offset + meta.code_len) as usize;
let code = &res.image.rom[start..end]; let code = &res.image.rom[start..end];
// Decode sequentially: each instruction is a u16 opcode (LE) followed by operands. // Decode sequentially using the canonical decoder; record opcode stream.
// We'll walk forward and record the sequence of opcodes, ignoring operands based on known sizes.
fn operand_size(op: u16) -> usize {
match OpCode::try_from(op) {
Ok(opc) => opc.spec().imm_bytes as usize,
Err(_) => 0,
}
}
let mut pcs = Vec::new(); let mut pcs = Vec::new();
let mut i = 0usize; let mut i = 0usize;
while i + 1 < code.len() { while i < code.len() {
let op = u16::from_le_bytes([code[i], code[i + 1]]); let instr = prometeu_bytecode::decoder::decode_next(i, code).expect("decoder should succeed");
pcs.push(op); pcs.push(instr.opcode as u16);
i += 2 + operand_size(op); i = instr.next_pc;
} }
assert_eq!(i, code.len(), "decoder must end exactly at function end");
assert!(pcs.len() >= 2); assert!(pcs.len() >= 2);
let last = *pcs.last().unwrap(); let last = *pcs.last().unwrap();

View File

@ -1,8 +0,0 @@
// Re-export canonical decoder from prometeu-bytecode to eliminate bespoke implementation in VM.
pub use prometeu_bytecode::decoder::{decode_next, DecodeError, DecodedInstr};
/// Backwards-compatible shim for legacy call sites; delegates to canonical decoder.
#[inline]
pub fn decode_at(rom: &[u8], pc: usize) -> Result<DecodedInstr<'_>, DecodeError> {
decode_next(pc, rom)
}

View File

@ -1 +0,0 @@
pub mod decoder;

View File

@ -2,8 +2,6 @@ mod virtual_machine;
mod call_frame; mod call_frame;
mod scope_frame; mod scope_frame;
pub mod local_addressing; pub mod local_addressing;
pub mod opcode_spec;
pub mod bytecode;
pub mod verifier; pub mod verifier;
pub use prometeu_abi::virtual_machine::program::ProgramImage; pub use prometeu_abi::virtual_machine::program::ProgramImage;

View File

@ -1,3 +0,0 @@
// Canonical `OpcodeSpec` now lives in `prometeu-bytecode`.
// Keep this module as a thin re-export for compatibility.
pub use prometeu_bytecode::opcode_spec::*;

View File

@ -1,5 +1,5 @@
use prometeu_abi::syscalls::Syscall; use prometeu_abi::syscalls::Syscall;
use crate::bytecode::decoder::{decode_at, DecodeError}; use prometeu_bytecode::decoder::{decode_next, DecodeError};
use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::opcode_spec::OpCodeSpecExt; use prometeu_bytecode::opcode_spec::OpCodeSpecExt;
use prometeu_bytecode::FunctionMeta; use prometeu_bytecode::FunctionMeta;
@ -62,7 +62,7 @@ impl Verifier {
let mut pc = 0; let mut pc = 0;
while pc < func_code.len() { while pc < func_code.len() {
valid_pc.insert(pc); valid_pc.insert(pc);
let instr = decode_at(func_code, pc).map_err(|e| match e { let instr = decode_next(pc, func_code).map_err(|e| match e {
DecodeError::UnknownOpcode { pc: _, opcode } => DecodeError::UnknownOpcode { pc: _, opcode } =>
VerifierError::UnknownOpcode { pc: func_start + pc, opcode }, VerifierError::UnknownOpcode { pc: func_start + pc, opcode },
DecodeError::TruncatedOpcode { pc: _ } => DecodeError::TruncatedOpcode { pc: _ } =>
@ -89,7 +89,7 @@ impl Verifier {
while let Some(pc) = worklist.pop_front() { while let Some(pc) = worklist.pop_front() {
let in_height = *stack_height_in.get(&pc).unwrap(); let in_height = *stack_height_in.get(&pc).unwrap();
let instr = decode_at(func_code, pc).unwrap(); // Guaranteed to succeed due to first pass let instr = decode_next(pc, func_code).unwrap(); // Guaranteed to succeed due to first pass
let spec = instr.opcode.spec(); let spec = instr.opcode.spec();
// Resolve dynamic pops/pushes // Resolve dynamic pops/pushes

View File

@ -366,7 +366,7 @@ impl VirtualMachine {
let start_pc = self.pc; let start_pc = self.pc;
// Fetch & Decode // Fetch & Decode
let instr = crate::bytecode::decoder::decode_at(&self.program.rom, self.pc) let instr = prometeu_bytecode::decoder::decode_next(self.pc, &self.program.rom)
.map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?; .map_err(|e| LogicalFrameEndingReason::Panic(format!("{:?}", e)))?;
let opcode = instr.opcode; let opcode = instr.opcode;