use crate::building::output::CompiledModule; use crate::building::plan::BuildStep; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::layout; use prometeu_bytecode::decoder::decode_next; use prometeu_bytecode::{ConstantPoolEntry, DebugInfo}; use std::collections::HashMap; use prometeu_abi::virtual_machine::{ProgramImage, Value}; use prometeu_analysis::ids::ProjectId; use prometeu_bytecode::readwrite::{read_u32_le, write_u32_le}; #[derive(Debug, PartialEq, Eq, Clone)] pub enum LinkError { OutOfBounds(usize, usize), UnresolvedSymbol(String), DuplicateExport(String), IncompatibleSymbolSignature(String), } impl std::fmt::Display for LinkError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { LinkError::OutOfBounds(pos, len) => write!(f, "Out of bounds: pos={} len={}", pos, len), LinkError::UnresolvedSymbol(s) => write!(f, "Unresolved symbol: {}", s), LinkError::DuplicateExport(s) => write!(f, "Duplicate export: {}", s), LinkError::IncompatibleSymbolSignature(s) => write!(f, "Incompatible symbol signature: {}", s), } } } impl std::error::Error for LinkError {} pub struct Linker; #[derive(Clone, Debug, PartialEq, Eq, Hash)] struct ConstantPoolBitKey(Vec); impl ConstantPoolBitKey { fn from_entry(entry: &ConstantPoolEntry) -> Self { match entry { ConstantPoolEntry::Null => Self(vec![0]), ConstantPoolEntry::Int64(v) => { let mut b = vec![1]; b.extend_from_slice(&v.to_le_bytes()); Self(b) } ConstantPoolEntry::Float64(v) => { let mut b = vec![2]; b.extend_from_slice(&v.to_bits().to_le_bytes()); Self(b) } ConstantPoolEntry::Boolean(v) => { Self(vec![3, if *v { 1 } else { 0 }]) } ConstantPoolEntry::String(v) => { let mut b = vec![4]; b.extend_from_slice(v.as_bytes()); Self(b) } ConstantPoolEntry::Int32(v) => { let mut b = vec![5]; b.extend_from_slice(&v.to_le_bytes()); Self(b) } } } } impl Linker { pub fn link(modules: Vec, steps: Vec) -> Result { if modules.len() != steps.len() { return Err(LinkError::IncompatibleSymbolSignature(format!("Module count ({}) does not match build steps count ({})", modules.len(), steps.len()))); } let mut combined_code = Vec::new(); let mut combined_functions = Vec::new(); let mut combined_constants = Vec::new(); let mut constant_map: HashMap = HashMap::new(); // Debug info merging let mut combined_pc_to_span = Vec::new(); let mut combined_function_names = Vec::new(); // 1. DebugSymbol resolution map: (ProjectKey, module_path, symbol_name) -> func_idx in combined_functions let mut global_symbols = HashMap::new(); let mut module_code_offsets = Vec::with_capacity(modules.len()); let mut module_function_offsets = Vec::with_capacity(modules.len()); // Map ProjectKey to index let _project_to_idx: HashMap<_, _> = modules.iter().enumerate().map(|(i, m)| (m.project_id.clone(), i)).collect(); // PASS 1: Collect exports and calculate offsets for (_i, module) in modules.iter().enumerate() { let code_offset = combined_code.len() as u32; let function_offset = combined_functions.len() as u32; module_code_offsets.push(code_offset); module_function_offsets.push(function_offset); for (key, meta) in &module.exports { if let Some(local_func_idx) = meta.func_idx { let global_func_idx = function_offset + local_func_idx; // Note: Use a tuple as key for clarity let symbol_id = (module.project_id.clone(), key.module_path.clone(), key.symbol_name.clone()); if global_symbols.contains_key(&symbol_id) { return Err(LinkError::DuplicateExport(format!("Project {:?} export {}:{} already defined", symbol_id.0, symbol_id.1, symbol_id.2))); } global_symbols.insert(symbol_id, global_func_idx); } } combined_code.extend_from_slice(&module.code); for func in &module.function_metas { let mut relocated = func.clone(); relocated.code_offset += code_offset; combined_functions.push(relocated); } if let Some(debug) = &module.debug_info { for (pc, span) in &debug.pc_to_span { combined_pc_to_span.push((code_offset + pc, span.clone())); } for (func_idx, name) in &debug.function_names { combined_function_names.push((function_offset + func_idx, name.clone())); } } } // PASS 2: Relocate constants and patch CALLs for (i, module) in modules.iter().enumerate() { let step = &steps[i]; let code_offset = module_code_offsets[i] as usize; // Map local constant indices to global constant indices let mut local_to_global_const = Vec::with_capacity(module.const_pool.len()); for entry in &module.const_pool { let bit_key = ConstantPoolBitKey::from_entry(entry); if let Some(&global_idx) = constant_map.get(&bit_key) { local_to_global_const.push(global_idx); } else { let global_idx = combined_constants.len() as u32; combined_constants.push(match entry { ConstantPoolEntry::Null => Value::Null, ConstantPoolEntry::Int64(v) => Value::Int64(*v), ConstantPoolEntry::Float64(v) => Value::Float(*v), ConstantPoolEntry::Boolean(v) => Value::Boolean(*v), ConstantPoolEntry::String(v) => Value::String(v.clone()), ConstantPoolEntry::Int32(v) => Value::Int32(*v), }); constant_map.insert(bit_key, global_idx); local_to_global_const.push(global_idx); } } // Patch imports for import in &module.imports { // Resolve the dependency project id. If alias is missing/self, try all deps as fallback. let mut candidate_projects: Vec<&ProjectId> = Vec::new(); if import.key.dep_alias == "self" || import.key.dep_alias.is_empty() { candidate_projects.push(&module.project_id); for (_alias, pid) in &step.deps { candidate_projects.push(pid); } } else { let pid = step.deps.get(&import.key.dep_alias) .ok_or_else(|| LinkError::UnresolvedSymbol(format!("Dependency alias '{}' not found in project {:?}", import.key.dep_alias, module.project_id)))?; candidate_projects.push(pid); } let mut resolved_idx: Option = None; for pid in candidate_projects { let pid_val: ProjectId = (*pid).clone(); let key = (pid_val, import.key.module_path.clone(), import.key.symbol_name.clone()); if let Some(&idx) = global_symbols.get(&key) { resolved_idx = Some(idx); break; } } let target_func_idx = resolved_idx.ok_or_else(|| { LinkError::UnresolvedSymbol(format!( "DebugSymbol '{}:{}' not found in any candidate project (self={:?}, deps={:?})", import.key.module_path, import.key.symbol_name, module.project_id, step.deps )) })?; for &reloc_pc in &import.relocation_pcs { // `reloc_pc` aponta para o INÍCIO do operando (após os 2 bytes do opcode), // conforme `assemble_with_unresolved` grava `pc` antes de escrever o U32. // Portanto, devemos escrever exatamente em `absolute_pc`. let absolute_pc = code_offset + reloc_pc as usize; if absolute_pc + 4 <= combined_code.len() { combined_code[absolute_pc..absolute_pc+4] .copy_from_slice(&target_func_idx.to_le_bytes()); } } } let mut pc = code_offset; let end = code_offset + module.code.len(); while pc < end { // Scope the immutable borrow from decode_next so we can mutate combined_code afterwards let (opcode, next_pc, imm_start, imm_u32_opt) = { match decode_next(pc, &combined_code) { Ok(instr) => { let opcode = instr.opcode; let next_pc = instr.next_pc; let imm_start = instr.pc + 2; // start of immediate payload let imm_u32_opt = match opcode { OpCode::PushConst | OpCode::Call | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { match instr.imm_u32() { Ok(v) => Some(v), Err(_) => None, } } _ => None, }; (opcode, next_pc, imm_start, imm_u32_opt) } Err(e) => { return Err(LinkError::IncompatibleSymbolSignature(format!( "Bytecode decode error at pc {}: {:?}", pc - code_offset, e ))); } } }; match opcode { OpCode::PushConst => { let local_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( "Invalid PUSH_CONST immediate at pc {}", pc - code_offset )))? as usize; if let Some(&global_idx) = local_to_global_const.get(local_idx) { patch_u32_at(&mut combined_code, imm_start, &|_| global_idx); } } OpCode::Call => { let local_func_idx = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( "Invalid CALL immediate at pc {}", pc - code_offset )))?; // Determine if this CALL site corresponds to an import relocation. let reloc_rel_pc = (imm_start - code_offset) as u32; let is_import = module .imports .iter() .any(|imp| imp.relocation_pcs.contains(&reloc_rel_pc)); if !is_import { let global_func_idx = module_function_offsets[i] + local_func_idx; patch_u32_at(&mut combined_code, imm_start, &|_| global_func_idx); } } // Relocate intra-function control-flow immediates by module code offset to preserve absolute PCs OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue => { // For branches, immediate must be present and represents rel PC from function start let _ = imm_u32_opt.ok_or_else(|| LinkError::IncompatibleSymbolSignature(format!( "Invalid branch immediate at pc {}", pc - code_offset )))?; patch_u32_at(&mut combined_code, imm_start, &|cur| cur + (code_offset as u32)); } _ => {} } pc = next_pc; } } // Final Exports map for ProgramImage (String -> func_idx) // Only including exports from the ROOT project (the last one in build plan usually) // In PBS v0, exports are name -> func_id. let mut final_exports = HashMap::new(); if let Some(root_module) = modules.last() { for (key, meta) in &root_module.exports { if let Some(local_func_idx) = meta.func_idx { let global_func_idx = module_function_offsets.last().unwrap() + local_func_idx; final_exports.insert(format!("{}:{}", key.module_path, key.symbol_name), global_func_idx); // Also provide short name for root module exports to facilitate entrypoint resolution if !final_exports.contains_key(&key.symbol_name) { final_exports.insert(key.symbol_name.clone(), global_func_idx); } } } } // v0: Fallback export for entrypoint `frame` (root module) if !final_exports.iter().any(|(name, _)| name.ends_with(":frame") || name == "frame") { if let Some(&root_offset) = module_function_offsets.last() { if let Some((idx, _)) = combined_function_names.iter().find(|(i, name)| *i >= root_offset && name == "frame") { final_exports.insert("frame".to_string(), *idx); final_exports.insert("src/main/modules:frame".to_string(), *idx); } } } // Ajuste final: se os nomes de função no DebugInfo estiverem enriquecidos no formato // "name@offset+len", alinhar apenas o `code_len` de `combined_functions[idx]` a esses // valores (os offsets do DebugInfo são locais ao módulo antes do link). Mantemos o // `code_offset` já realocado durante o PASS 1. // Track which function metas received a precise code_len from DebugInfo let mut has_precise_len: Vec = vec![false; combined_functions.len()]; for (idx, name) in &combined_function_names { if let Some((base, rest)) = name.split_once('@') { let mut parts = rest.split('+'); if let (Some(off_str), Some(len_str)) = (parts.next(), parts.next()) { if let (Ok(_off), Ok(len)) = (off_str.parse::(), len_str.parse::()) { if let Some(meta) = combined_functions.get_mut(*idx as usize) { let old_off = meta.code_offset; let old_len = meta.code_len; meta.code_len = len; has_precise_len[*idx as usize] = true; eprintln!( "[Linker][debug] Align len idx={} name={} -> code_offset {} (kept) | code_len {} -> {}", idx, base, old_off, old_len, len ); } } } } } // Ensure DebugInfo also contains plain base names alongside enriched names for easy lookup. // For any entry of form "name@off+len", also add (idx, "name") if missing. let mut plain_names_to_add: Vec<(u32, String)> = Vec::new(); for (idx, name) in &combined_function_names { if let Some((base, _)) = name.split_once('@') { let already_has_plain = combined_function_names.iter().any(|(i, n)| i == idx && n == base); if !already_has_plain { plain_names_to_add.push((*idx, base.to_string())); } } } combined_function_names.extend(plain_names_to_add); // Recompute code_len ONLY for functions that did NOT receive a precise length from DebugInfo. // This preserves exact ends emitted by the compiler while still filling lengths for functions // that lack enriched annotations. let total_len = combined_code.len(); for i in 0..combined_functions.len() { if !has_precise_len.get(i).copied().unwrap_or(false) { let start = combined_functions[i].code_offset as usize; let end = layout::function_end_from_next(&combined_functions, i, total_len); combined_functions[i].code_len = end.saturating_sub(start) as u32; } } // Removido padding específico de `frame`; o emissor passou a garantir que o label de término // esteja no ponto exato do fim do corpo, e, quando necessário, insere NOPs reais antes do fim. // Garantir export do entry point 'frame' mesmo com nomes enriquecidos no DebugInfo. if !final_exports.contains_key("frame") { if let Some((idx, _name)) = combined_function_names.iter().find(|(i, name)| { let base = name.split('@').next().unwrap_or(name.as_str()); let i_usize = *i as usize; (base == "frame" || base.ends_with(":frame")) && combined_functions.get(i_usize).map(|m| m.param_slots == 0 && m.return_slots == 0).unwrap_or(false) }) { final_exports.insert("frame".to_string(), *idx); final_exports.insert("src/main/modules:frame".to_string(), *idx); } } let combined_debug_info = if combined_pc_to_span.is_empty() && combined_function_names.is_empty() { None } else { // Ensure entry-point name mapping is present for easy lookup in DebugInfo if let Some(frame_idx) = final_exports.get("frame") { if !combined_function_names.iter().any(|(i, n)| i == frame_idx && n == "frame") { combined_function_names.push((*frame_idx, "frame".to_string())); } } Some(DebugInfo { pc_to_span: combined_pc_to_span, function_names: combined_function_names, }) }; Ok(ProgramImage::new( combined_code, combined_constants, combined_functions, combined_debug_info, final_exports, )) } } fn patch_u32_at( buf: &mut [u8], pos: usize, f: impl FnOnce(u32) -> u32, ) -> Result<(), LinkError> { let current = prometeu_bytecode::io::read_u32_le(buf, pos).ok_or(LinkError::OutOfBounds(pos, buf.len()))?; let next = f(current); prometeu_bytecode::io::write_u32_le(buf, pos, next).ok_or(LinkError::OutOfBounds(pos, buf.len()))?; Ok(()) } #[cfg(test)] mod tests { use super::*; use crate::building::output::{ExportKey, ExportMetadata, ImportKey, ImportMetadata}; use crate::building::plan::BuildTarget; use crate::deps::resolver::ProjectKey; use crate::semantics::export_surface::ExportSurfaceKind; use prometeu_analysis::ids::ProjectId; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::FunctionMeta; use std::collections::BTreeMap; #[test] fn test_link_root_and_lib() { let lib_key = ProjectKey { name: "lib".into(), version: "1.0.0".into() }; let root_key = ProjectKey { name: "root".into(), version: "1.0.0".into() }; let lib_id = ProjectId(0); let root_id = ProjectId(1); // Lib module: exports 'add' let mut lib_code = Vec::new(); lib_code.extend_from_slice(&(OpCode::Add as u16).to_le_bytes()); lib_code.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); let mut lib_exports = BTreeMap::new(); lib_exports.insert(ExportKey { module_path: "math".into(), symbol_name: "add".into(), kind: ExportSurfaceKind::Service, }, ExportMetadata { func_idx: Some(0), is_host: false, ty: None }); let lib_module = CompiledModule { project_id: lib_id, project_key: lib_key.clone(), target: BuildTarget::Main, exports: lib_exports, imports: vec![], const_pool: vec![], code: lib_code, function_metas: vec![FunctionMeta { code_offset: 0, code_len: 4, ..Default::default() }], debug_info: None, symbols: vec![], }; // Root module: calls 'lib::math:add' let mut root_code = Vec::new(); root_code.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes()); root_code.extend_from_slice(&10i32.to_le_bytes()); root_code.extend_from_slice(&(OpCode::PushI32 as u16).to_le_bytes()); root_code.extend_from_slice(&20i32.to_le_bytes()); // Call lib:math:add let call_pc = root_code.len() as u32; root_code.extend_from_slice(&(OpCode::Call as u16).to_le_bytes()); root_code.extend_from_slice(&0u32.to_le_bytes()); // placeholder root_code.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); let root_imports = vec![ImportMetadata { key: ImportKey { dep_alias: "mylib".into(), module_path: "math".into(), symbol_name: "add".into(), }, relocation_pcs: vec![call_pc], }]; let root_module = CompiledModule { project_id: root_id, project_key: root_key.clone(), target: BuildTarget::Main, exports: BTreeMap::new(), imports: root_imports, const_pool: vec![], code: root_code, function_metas: vec![FunctionMeta { code_offset: 0, code_len: 20, ..Default::default() }], debug_info: None, symbols: vec![], }; let lib_step = BuildStep { project_id: lib_id, project_key: lib_key.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new(), }; let mut root_deps: BTreeMap = BTreeMap::new(); root_deps.insert("mylib".into(), lib_id); let root_step = BuildStep { project_id: root_id, project_key: root_key.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: root_deps, }; let result = Linker::link(vec![lib_module, root_module], vec![lib_step, root_step]).unwrap(); assert_eq!(result.functions.len(), 2); // lib:add is func 0 // root:main is func 1 // lib_code length is 4. // Root code starts at 4. // CALL was at root_code offset 12. // Absolute PC of CALL: 4 + 12 = 16. // Immediate is at 16 + 2 = 18. let patched_func_idx = u32::from_le_bytes(result.rom[18..22].try_into().unwrap()); assert_eq!(patched_func_idx, 0); // Points to lib:add } #[test] fn test_link_const_deduplication() { let key = ProjectKey { name: "test".into(), version: "1.0.0".into() }; let id = ProjectId(0); let step = BuildStep { project_id: id, project_key: key.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new() }; let m1 = CompiledModule { project_id: id, project_key: key.clone(), target: BuildTarget::Main, exports: BTreeMap::new(), imports: vec![], const_pool: vec![ConstantPoolEntry::Int32(42), ConstantPoolEntry::String("hello".into())], code: vec![], function_metas: vec![], debug_info: None, symbols: vec![], }; let m2 = CompiledModule { project_id: id, project_key: key.clone(), target: BuildTarget::Main, exports: BTreeMap::new(), imports: vec![], const_pool: vec![ConstantPoolEntry::String("hello".into()), ConstantPoolEntry::Int32(99)], code: vec![], function_metas: vec![], debug_info: None, symbols: vec![], }; let result = Linker::link(vec![m1, m2], vec![step.clone(), step]).unwrap(); // Constants should be: 42, "hello", 99 assert_eq!(result.constant_pool.len(), 3); assert_eq!(result.constant_pool[0], Value::Int32(42)); assert_eq!(result.constant_pool[1], Value::String("hello".into())); assert_eq!(result.constant_pool[2], Value::Int32(99)); } #[test] fn test_jump_relocation_across_modules() { // Module 1: small stub to create a non-zero code offset for module 2 let key1 = ProjectKey { name: "m1".into(), version: "1.0.0".into() }; let id1 = ProjectId(0); let step1 = BuildStep { project_id: id1, project_key: key1.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new() }; let mut code1 = Vec::new(); code1.extend_from_slice(&(OpCode::Add as u16).to_le_bytes()); code1.extend_from_slice(&(OpCode::Ret as u16).to_le_bytes()); let m1 = CompiledModule { project_id: id1, project_key: key1.clone(), target: BuildTarget::Main, exports: BTreeMap::new(), imports: vec![], const_pool: vec![], code: code1.clone(), function_metas: vec![FunctionMeta { code_offset: 0, code_len: code1.len() as u32, ..Default::default() }], debug_info: None, symbols: vec![], }; // Module 2: contains an unconditional JMP and a conditional JMP_IF_TRUE with local targets let key2 = ProjectKey { name: "m2".into(), version: "1.0.0".into() }; let id2 = ProjectId(1); let step2 = BuildStep { project_id: id2, project_key: key2.clone(), project_dir: "".into(), target: BuildTarget::Main, sources: vec![], deps: BTreeMap::new() }; let mut code2 = Vec::new(); // Unconditional JMP to local target 0 (module-local start) let jmp_pc = code2.len() as u32; // where opcode will be placed code2.extend_from_slice(&(OpCode::Jmp as u16).to_le_bytes()); code2.extend_from_slice(&0u32.to_le_bytes()); // PushBool true; then conditional jump to local target 0 code2.extend_from_slice(&(OpCode::PushBool as u16).to_le_bytes()); code2.push(1u8); let cjmp_pc = code2.len() as u32; code2.extend_from_slice(&(OpCode::JmpIfTrue as u16).to_le_bytes()); code2.extend_from_slice(&0u32.to_le_bytes()); // End with HALT so VM would stop if executed code2.extend_from_slice(&(OpCode::Halt as u16).to_le_bytes()); let m2 = CompiledModule { project_id: id2, project_key: key2.clone(), target: BuildTarget::Main, exports: BTreeMap::new(), imports: vec![], const_pool: vec![], code: code2.clone(), function_metas: vec![FunctionMeta { code_offset: 0, code_len: code2.len() as u32, ..Default::default() }], debug_info: None, symbols: vec![], }; // Link with order [m1, m2] let result = Linker::link(vec![m1, m2], vec![step1, step2]).unwrap(); // Module 2's code starts after module 1's code let module2_offset = code1.len() as u32; // Verify that the JMP immediate equals original_target (0) + module2_offset let jmp_abs_pc = module2_offset as usize + jmp_pc as usize; let jmp_imm_off = jmp_abs_pc + 2; // skip opcode let jmp_patched = u32::from_le_bytes(result.rom[jmp_imm_off..jmp_imm_off+4].try_into().unwrap()); assert_eq!(jmp_patched, module2_offset); // Verify that the conditional JMP immediate was relocated similarly let cjmp_abs_pc = module2_offset as usize + cjmp_pc as usize; let cjmp_imm_off = cjmp_abs_pc + 2; let cjmp_patched = u32::from_le_bytes(result.rom[cjmp_imm_off..cjmp_imm_off+4].try_into().unwrap()); assert_eq!(cjmp_patched, module2_offset); } }