2026-03-24 13:40:31 +00:00

644 lines
28 KiB
Rust

use crate::building::output::CompiledModule;
use crate::building::plan::BuildStep;
use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::layout;
use prometeu_bytecode::decoder::decode_next;
use prometeu_bytecode::{ConstantPoolEntry, DebugInfo};
use std::collections::HashMap;
use prometeu_abi::virtual_machine::{ProgramImage, Value};
use prometeu_analysis::ids::ProjectId;
use prometeu_bytecode::readwrite::{read_u32_le, write_u32_le};
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum LinkError {
OutOfBounds(usize, usize),
UnresolvedSymbol(String),
DuplicateExport(String),
IncompatibleSymbolSignature(String),
}
impl std::fmt::Display for LinkError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LinkError::OutOfBounds(pos, len) => write!(f, "Out of bounds: pos={} len={}", pos, len),
LinkError::UnresolvedSymbol(s) => write!(f, "Unresolved symbol: {}", s),
LinkError::DuplicateExport(s) => write!(f, "Duplicate export: {}", s),
LinkError::IncompatibleSymbolSignature(s) => write!(f, "Incompatible symbol signature: {}", s),
}
}
}
impl std::error::Error for LinkError {}
pub struct Linker;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
struct ConstantPoolBitKey(Vec<u8>);
impl ConstantPoolBitKey {
fn from_entry(entry: &ConstantPoolEntry) -> Self {
match entry {
ConstantPoolEntry::Null => Self(vec![0]),
ConstantPoolEntry::Int64(v) => {
let mut b = vec![1];
b.extend_from_slice(&v.to_le_bytes());
Self(b)
}
ConstantPoolEntry::Float64(v) => {
let mut b = vec![2];
b.extend_from_slice(&v.to_bits().to_le_bytes());
Self(b)
}
ConstantPoolEntry::Boolean(v) => {
Self(vec![3, if *v { 1 } else { 0 }])
}
ConstantPoolEntry::String(v) => {
let mut b = vec![4];
b.extend_from_slice(v.as_bytes());
Self(b)
}
ConstantPoolEntry::Int32(v) => {
let mut b = vec![5];
b.extend_from_slice(&v.to_le_bytes());
Self(b)
}
}
}
}
impl Linker {
pub fn link(modules: Vec<CompiledModule>, steps: Vec<BuildStep>) -> Result<ProgramImage, LinkError> {
if modules.len() != steps.len() {
return Err(LinkError::IncompatibleSymbolSignature(format!("Module count ({}) does not match build steps count ({})", modules.len(), steps.len())));
}
let mut combined_code = Vec::new();
let mut combined_functions = Vec::new();
let mut combined_constants = Vec::new();
let mut constant_map: HashMap<ConstantPoolBitKey, u32> = HashMap::new();
// Debug info merging
let mut combined_pc_to_span = Vec::new();
let mut combined_function_names = Vec::new();
// 1. DebugSymbol resolution map: (ProjectKey, module_path, symbol_name) -> func_idx in combined_functions
let mut global_symbols = HashMap::new();
let mut module_code_offsets = Vec::with_capacity(modules.len());
let mut module_function_offsets = Vec::with_capacity(modules.len());
// Map ProjectKey to index
let _project_to_idx: HashMap<_, _> = modules.iter().enumerate().map(|(i, m)| (m.project_id.clone(), i)).collect();
// PASS 1: Collect exports and calculate offsets
for (_i, module) in modules.iter().enumerate() {
let code_offset = combined_code.len() as u32;
let function_offset = combined_functions.len() as u32;
module_code_offsets.push(code_offset);
module_function_offsets.push(function_offset);
for (key, meta) in &module.exports {
if let Some(local_func_idx) = meta.func_idx {
let global_func_idx = function_offset + local_func_idx;
// Note: Use a tuple as key for clarity
let symbol_id = (module.project_id.clone(), key.module_path.clone(), key.symbol_name.clone());
if global_symbols.contains_key(&symbol_id) {
return Err(LinkError::DuplicateExport(format!("Project {:?} export {}:{} already defined", symbol_id.0, symbol_id.1, symbol_id.2)));
}
global_symbols.insert(symbol_id, global_func_idx);
}
}
combined_code.extend_from_slice(&module.code);
for func in &module.function_metas {
let mut relocated = func.clone();
relocated.code_offset += code_offset;
combined_functions.push(relocated);
}
if let Some(debug) = &module.debug_info {
for (pc, span) in &debug.pc_to_span {
combined_pc_to_span.push((code_offset + pc, span.clone()));
}
for (func_idx, name) in &debug.function_names {
combined_function_names.push((function_offset + func_idx, name.clone()));
}
}
}
// PASS 2: Relocate constants and patch CALLs
for (i, module) in modules.iter().enumerate() {
let step = &steps[i];
let code_offset = module_code_offsets[i] as usize;
// Map local constant indices to global constant indices
let mut local_to_global_const = Vec::with_capacity(module.const_pool.len());
for entry in &module.const_pool {
let bit_key = ConstantPoolBitKey::from_entry(entry);
if let Some(&global_idx) = constant_map.get(&bit_key) {
local_to_global_const.push(global_idx);
} else {
let global_idx = combined_constants.len() as u32;
combined_constants.push(match entry {
ConstantPoolEntry::Null => Value::Null,
ConstantPoolEntry::Int64(v) => Value::Int64(*v),
ConstantPoolEntry::Float64(v) => Value::Float(*v),
ConstantPoolEntry::Boolean(v) => Value::Boolean(*v),
ConstantPoolEntry::String(v) => Value::String(v.clone()),
ConstantPoolEntry::Int32(v) => Value::Int32(*v),
});
constant_map.insert(bit_key, global_idx);
local_to_global_const.push(global_idx);
}
}
// Patch imports
for import in &module.imports {
// Resolve the dependency project id. If alias is missing/self, try all deps as fallback.
let mut candidate_projects: Vec<&ProjectId> = Vec::new();
if import.key.dep_alias == "self" || import.key.dep_alias.is_empty() {
candidate_projects.push(&module.project_id);
for (_alias, pid) in &step.deps { candidate_projects.push(pid); }
} else {
let pid = step.deps.get(&import.key.dep_alias)
.ok_or_else(|| LinkError::UnresolvedSymbol(format!("Dependency alias '{}' not found in project {:?}", import.key.dep_alias, module.project_id)))?;
candidate_projects.push(pid);
}
let mut resolved_idx: Option<u32> = None;
for pid in candidate_projects {
let pid_val: ProjectId = (*pid).clone();
let key = (pid_val, import.key.module_path.clone(), import.key.symbol_name.clone());
if let Some(&idx) = global_symbols.get(&key) {
resolved_idx = Some(idx);
break;
}
}
let target_func_idx = resolved_idx.ok_or_else(|| {
LinkError::UnresolvedSymbol(format!(
"DebugSymbol '{}:{}' not found in any candidate project (self={:?}, deps={:?})",
import.key.module_path,
import.key.symbol_name,
module.project_id,
step.deps
))
})?;
for &reloc_pc in &import.relocation_pcs {
// `reloc_pc` aponta para o INÍCIO do operando (após os 2 bytes do opcode),
// conforme `assemble_with_unresolved` grava `pc` antes de escrever o U32.
// Portanto, devemos escrever exatamente em `absolute_pc`.
let absolute_pc = code_offset + reloc_pc as usize;
if absolute_pc + 4 <= combined_code.len() {
combined_code[absolute_pc..absolute_pc+4]
.copy_from_slice(&target_func_idx.to_le_bytes());
}
}
}
let mut pc = code_offset;
let end = code_offset + module.code.len();
while pc < end {
// Scope the immutable borrow from decode_next so we can mutate combined_code afterwards
let (opcode, next_pc, imm_start, imm_u32_opt) = {
match decode_next(pc, &combined_code) {
Ok(instr) => {
let opcode = instr.opcode;
let next_pc = instr.next_pc;
let imm_start = instr.pc + 2; // start of immediate payload
let imm_u32_opt = match opcode {
OpCode::PushConst | OpCode::Call | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => {
match instr.imm_u32() {
Ok(v) => Some(v),
Err(_) => None,
}
}
_ => None,
};
(opcode, next_pc, imm_start, imm_u32_opt)
}
Err(e) => {
return Err(LinkError::IncompatibleSymbolSignature(format!(
"Bytecode decode error at pc {}: {:?}",
pc - code_offset, e
)));
}
}
};
match opcode {
OpCode::PushConst => {
let local_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
"Invalid PUSH_CONST immediate at pc {}",
pc - code_offset
)))? as usize;
if let Some(&global_idx) = local_to_global_const.get(local_idx) {
patch_u32_at(&mut combined_code, imm_start, &|_| global_idx);
}
}
OpCode::Call => {
let local_func_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
"Invalid CALL immediate at pc {}",
pc - code_offset
)))?;
// Determine if this CALL site corresponds to an import relocation.
let reloc_rel_pc = (imm_start - code_offset) as u32;
let is_import = module
.imports
.iter()
.any(|imp| imp.relocation_pcs.contains(&reloc_rel_pc));
if !is_import {
let global_func_idx = module_function_offsets[i] + local_func_idx;
patch_u32_at(&mut combined_code, imm_start, &|_| global_func_idx);
}
}
// Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs
OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => {
// For branches, immediate must be present and represents rel PC from function start
let _ = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!(
"Invalid branch immediate at pc {}",
pc - code_offset
)))?;
patch_u32_at(&mut combined_code, imm_start, &|cur| cur + (code_offset as u32));
}
_ => {}
}
pc = next_pc;
}
}
// Final Exports map for ProgramImage (String -> func_idx)
// Only including exports from the ROOT project (the last one in build plan usually)
// In PBS v0, exports are name -> func_id.
let mut final_exports = HashMap::new();
if let Some(root_module) = modules.last() {
for (key, meta) in &root_module.exports {
if let Some(local_func_idx) = meta.func_idx {
let global_func_idx = module_function_offsets.last().unwrap() + local_func_idx;
final_exports.insert(format!("{}:{}", key.module_path, key.symbol_name), global_func_idx);
// Also provide short name for root module exports to facilitate entrypoint resolution
if !final_exports.contains_key(&key.symbol_name) {
final_exports.insert(key.symbol_name.clone(), global_func_idx);
}
}
}
}
// v0: Fallback export for entrypoint `frame` (root module)
if !final_exports.iter().any(|(name, _)| name.ends_with(":frame") || name == "frame") {
if let Some(&root_offset) = module_function_offsets.last() {
if let Some((idx, _)) = combined_function_names.iter().find(|(i, name)| *i >= root_offset && name == "frame") {
final_exports.insert("frame".to_string(), *idx);
final_exports.insert("src/main/modules:frame".to_string(), *idx);
}
}
}
// Ajuste final: se os nomes de função no DebugInfo estiverem enriquecidos no formato
// "name@offset+len", alinhar apenas o `code_len` de `combined_functions[idx]` a esses
// valores (os offsets do DebugInfo são locais ao módulo antes do link). Mantemos o
// `code_offset` já realocado durante o PASS 1.
// Track which function metas received a precise code_len from DebugInfo
let mut has_precise_len: Vec<bool> = vec![false; combined_functions.len()];
for (idx, name) in &combined_function_names {
if let Some((base, rest)) = name.split_once('@') {
let mut parts = rest.split('+');
if let (Some(off_str), Some(len_str)) = (parts.next(), parts.next()) {
if let (Ok(_off), Ok(len)) = (off_str.parse::<u32>(), len_str.parse::<u32>()) {
if let Some(meta) = combined_functions.get_mut(*idx as usize) {
let old_off = meta.code_offset;
let old_len = meta.code_len;
meta.code_len = len;
has_precise_len[*idx as usize] = true;
eprintln!(
"[Linker][debug] Align len idx={} name={} -> code_offset {} (kept) | code_len {} -> {}",
idx, base, old_off, old_len, len
);
}
}
}
}
}
// Ensure DebugInfo also contains plain base names alongside enriched names for easy lookup.
// For any entry of form "name@off+len", also add (idx, "name") if missing.
let mut plain_names_to_add: Vec<(u32, String)> = Vec::new();
for (idx, name) in &combined_function_names {
if let Some((base, _)) = name.split_once('@') {
let already_has_plain = combined_function_names.iter().any(|(i, n)| i == idx && n == base);
if !already_has_plain {
plain_names_to_add.push((*idx, base.to_string()));
}
}
}
combined_function_names.extend(plain_names_to_add);
// Recompute code_len ONLY for functions that did NOT receive a precise length from DebugInfo.
// This preserves exact ends emitted by the compiler while still filling lengths for functions
// that lack enriched annotations.
let total_len = combined_code.len();
for i in 0..combined_functions.len() {
if !has_precise_len.get(i).copied().unwrap_or(false) {
let start = combined_functions[i].code_offset as usize;
let end = layout::function_end_from_next(&combined_functions, i, total_len);
combined_functions[i].code_len = end.saturating_sub(start) as u32;
}
}
// Removido padding específico de `frame`; o emissor passou a garantir que o label de término
// esteja no ponto exato do fim do corpo, e, quando necessário, insere NOPs reais antes do fim.
// Garantir export do entry point 'frame' mesmo com nomes enriquecidos no DebugInfo.
if !final_exports.contains_key("frame") {
if let Some((idx, _name)) = combined_function_names.iter().find(|(i, name)| {
let base = name.split('@').next().unwrap_or(name.as_str());
let i_usize = *i as usize;
(base == "frame" || base.ends_with(":frame"))
&& combined_functions.get(i_usize).map(|m| m.param_slots == 0 && m.return_slots == 0).unwrap_or(false)
}) {
final_exports.insert("frame".to_string(), *idx);
final_exports.insert("src/main/modules:frame".to_string(), *idx);
}
}
let combined_debug_info = if combined_pc_to_span.is_empty() && combined_function_names.is_empty() {
None
} else {
// Ensure entry-point name mapping is present for easy lookup in DebugInfo
if let Some(frame_idx) = final_exports.get("frame") {
if !combined_function_names.iter().any(|(i, n)| i == frame_idx && n == "frame") {
combined_function_names.push((*frame_idx, "frame".to_string()));
}
}
Some(DebugInfo {
pc_to_span: combined_pc_to_span,
function_names: combined_function_names,
})
};
Ok(ProgramImage::new(
combined_code,
combined_constants,
combined_functions,
combined_debug_info,
final_exports,
))
}
}
fn patch_u32_at(
buf: &mut [u8],
pos: usize,
f: impl FnOnce(u32) -> u32,
) -> Result<(), LinkError> {
let current = prometeu_bytecode::io::read_u32_le(buf, pos).ok_or(LinkError::OutOfBounds(pos, buf.len()))?;
let next = f(current);
prometeu_bytecode::io::write_u32_le(buf, pos, next).ok_or(LinkError::OutOfBounds(pos, buf.len()))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::building::output::{ExportKey, ExportMetadata, ImportKey, ImportMetadata};
use crate::building::plan::BuildTarget;
use crate::deps::resolver::ProjectKey;
use crate::semantics::export_surface::ExportSurfaceKind;
use prometeu_analysis::ids::ProjectId;
use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::FunctionMeta;
use std::collections::BTreeMap;
#[test]
fn test_link_root_and_lib() {
let lib_key = ProjectKey { name: "lib".into(), version: "1.0.0".into() };
let root_key = ProjectKey { name: "root".into(), version: "1.0.0".into() };
let lib_id = ProjectId(0);
let root_id = ProjectId(1);
// Lib module: exports 'add'
let mut lib_code = Vec::new();
lib_code.extend_from_slice(&(OpCode::Add as u16).to_le_bytes());
lib_code.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes());
let mut lib_exports = BTreeMap::new();
lib_exports.insert(ExportKey {
module_path: "math".into(),
symbol_name: "add".into(),
kind: ExportSurfaceKind::Service,
}, ExportMetadata { func_idx: Some(0), is_host: false, ty: None });
let lib_module = CompiledModule {
project_id: lib_id,
project_key: lib_key.clone(),
target: BuildTarget::Main,
exports: lib_exports,
imports: vec![],
const_pool: vec![],
code: lib_code,
function_metas: vec![FunctionMeta {
code_offset: 0,
code_len: 4,
..Default::default()
}],
debug_info: None,
symbols: vec![],
};
// Root module: calls 'lib::math:add'
let mut root_code = Vec::new();
root_code.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes());
root_code.extend_from_slice(&10i32.to_le_bytes());
root_code.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes());
root_code.extend_from_slice(&20i32.to_le_bytes());
// Call lib:math:add
let call_pc = root_code.len() as u32;
root_code.extend_from_slice(&(OpCode::Call as u16).to_le_bytes());
root_code.extend_from_slice(&0u32.to_le_bytes()); // placeholder
root_code.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes());
let root_imports = vec![ImportMetadata {
key: ImportKey {
dep_alias: "mylib".into(),
module_path: "math".into(),
symbol_name: "add".into(),
},
relocation_pcs: vec![call_pc],
}];
let root_module = CompiledModule {
project_id: root_id,
project_key: root_key.clone(),
target: BuildTarget::Main,
exports: BTreeMap::new(),
imports: root_imports,
const_pool: vec![],
code: root_code,
function_metas: vec![FunctionMeta {
code_offset: 0,
code_len: 20,
..Default::default()
}],
debug_info: None,
symbols: vec![],
};
let lib_step = BuildStep {
project_id: lib_id,
project_key: lib_key.clone(),
project_dir: "".into(),
target: BuildTarget::Main,
sources: vec![],
deps: BTreeMap::new(),
};
let mut root_deps: BTreeMap<String, ProjectId> = BTreeMap::new();
root_deps.insert("mylib".into(), lib_id);
let root_step = BuildStep {
project_id: root_id,
project_key: root_key.clone(),
project_dir: "".into(),
target: BuildTarget::Main,
sources: vec![],
deps: root_deps,
};
let result = Linker::link(vec![lib_module, root_module], vec![lib_step, root_step]).unwrap();
assert_eq!(result.functions.len(), 2);
// lib:add is func 0
// root:main is func 1
// lib_code length is 4.
// Root code starts at 4.
// CALL was at root_code offset 12.
// Absolute PC of CALL: 4 + 12 = 16.
// Immediate is at 16 + 2 = 18.
let patched_func_idx = u32::from_le_bytes(result.rom[18..22].try_into().unwrap());
assert_eq!(patched_func_idx, 0); // Points to lib:add
}
#[test]
fn test_link_const_deduplication() {
let key = ProjectKey { name: "test".into(), version: "1.0.0".into() };
let id = ProjectId(0);
let step = BuildStep { project_id: id, project_key: key.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new() };
let m1 = CompiledModule {
project_id: id,
project_key: key.clone(),
target: BuildTarget::Main,
exports: BTreeMap::new(),
imports: vec![],
const_pool: vec![ConstantPoolEntry::Int32(42), ConstantPoolEntry::String("hello".into())],
code: vec![],
function_metas: vec![],
debug_info: None,
symbols: vec![],
};
let m2 = CompiledModule {
project_id: id,
project_key: key.clone(),
target: BuildTarget::Main,
exports: BTreeMap::new(),
imports: vec![],
const_pool: vec![ConstantPoolEntry::String("hello".into()), ConstantPoolEntry::Int32(99)],
code: vec![],
function_metas: vec![],
debug_info: None,
symbols: vec![],
};
let result = Linker::link(vec![m1, m2], vec![step.clone(), step]).unwrap();
// Constants should be: 42, "hello", 99
assert_eq!(result.constant_pool.len(), 3);
assert_eq!(result.constant_pool[0], Value::Int32(42));
assert_eq!(result.constant_pool[1], Value::String("hello".into()));
assert_eq!(result.constant_pool[2], Value::Int32(99));
}
#[test]
fn test_jump_relocation_across_modules() {
// Module 1: small stub to create a non-zero code offset for module 2
let key1 = ProjectKey { name: "m1".into(), version: "1.0.0".into() };
let id1 = ProjectId(0);
let step1 = BuildStep { project_id: id1, project_key: key1.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new() };
let mut code1 = Vec::new();
code1.extend_from_slice(&(OpCode::Add as u16).to_le_bytes());
code1.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes());
let m1 = CompiledModule {
project_id: id1,
project_key: key1.clone(),
target: BuildTarget::Main,
exports: BTreeMap::new(),
imports: vec![],
const_pool: vec![],
code: code1.clone(),
function_metas: vec![FunctionMeta { code_offset: 0, code_len: code1.len() as u32, ..Default::default() }],
debug_info: None,
symbols: vec![],
};
// Module 2: contains an unconditional JMP and a conditional JMP_IF_TRUE with local targets
let key2 = ProjectKey { name: "m2".into(), version: "1.0.0".into() };
let id2 = ProjectId(1);
let step2 = BuildStep { project_id: id2, project_key: key2.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new() };
let mut code2 = Vec::new();
// Unconditional JMP to local target 0 (module-local start)
let jmp_pc = code2.len() as u32; // where opcode will be placed
code2.extend_from_slice(&(OpCode::Jmp as u16).to_le_bytes());
code2.extend_from_slice(&0u32.to_le_bytes());
// PushBool true; then conditional jump to local target 0
code2.extend_from_slice(&(OpCode::PushBool as u16).to_le_bytes());
code2.push(1u8);
let cjmp_pc = code2.len() as u32;
code2.extend_from_slice(&(OpCode::JmpIfTrue as u16).to_le_bytes());
code2.extend_from_slice(&0u32.to_le_bytes());
// End with HALT so VM would stop if executed
code2.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes());
let m2 = CompiledModule {
project_id: id2,
project_key: key2.clone(),
target: BuildTarget::Main,
exports: BTreeMap::new(),
imports: vec![],
const_pool: vec![],
code: code2.clone(),
function_metas: vec![FunctionMeta { code_offset: 0, code_len: code2.len() as u32, ..Default::default() }],
debug_info: None,
symbols: vec![],
};
// Link with order [m1, m2]
let result = Linker::link(vec![m1, m2], vec![step1, step2]).unwrap();
// Module 2's code starts after module 1's code
let module2_offset = code1.len() as u32;
// Verify that the JMP immediate equals original_target (0) + module2_offset
let jmp_abs_pc = module2_offset as usize + jmp_pc as usize;
let jmp_imm_off = jmp_abs_pc + 2; // skip opcode
let jmp_patched = u32::from_le_bytes(result.rom[jmp_imm_off..jmp_imm_off+4].try_into().unwrap());
assert_eq!(jmp_patched, module2_offset);
// Verify that the conditional JMP immediate was relocated similarly
let cjmp_abs_pc = module2_offset as usize + cjmp_pc as usize;
let cjmp_imm_off = cjmp_abs_pc + 2;
let cjmp_patched = u32::from_le_bytes(result.rom[cjmp_imm_off..cjmp_imm_off+4].try_into().unwrap());
assert_eq!(cjmp_patched, module2_offset);
}
}