//! # Bytecode Emitter //! //! This module is responsible for the final stage of the compilation process: //! converting the Intermediate Representation (IR) into the binary Prometeu ByteCode (PBC) format. //! //! It performs two main tasks: //! 1. **Instruction Lowering**: Translates `ir_vm::Instruction` into `prometeu_bytecode::asm::Asm` ops. //! 2. **DebugSymbol Mapping**: Associates bytecode offsets (Program Counter) with source code locations. use crate::ir_core::ConstantValue; use crate::ir_vm; use crate::ir_vm::instr::InstrKind; use anyhow::{anyhow, Result}; use prometeu_bytecode::abi::SourceSpan; use prometeu_bytecode::asm::{update_pc_by_operand, Asm, Operand}; use prometeu_bytecode::opcode::OpCode; use prometeu_bytecode::{BytecodeModule, ConstantPoolEntry, DebugInfo, FunctionMeta}; /// The final output of the code generation phase. pub struct EmitResult { /// The serialized binary data of the PBC file. pub rom: Vec, } pub struct EmitFragments { pub const_pool: Vec, pub functions: Vec, pub code: Vec, pub debug_info: Option, pub unresolved_labels: std::collections::HashMap>, } /// Entry point for emitting a bytecode module from the IR. pub fn emit_module(module: &ir_vm::Module) -> Result { let fragments = emit_fragments(module)?; let exports: Vec<_> = module.functions.iter().enumerate().map(|(i, f)| { prometeu_bytecode::Export { symbol: f.name.clone(), func_idx: i as u32, } }).collect(); let bytecode_module = BytecodeModule { version: 0, const_pool: fragments.const_pool, functions: fragments.functions, code: fragments.code, debug_info: fragments.debug_info, exports, }; Ok(EmitResult { rom: bytecode_module.serialize(), }) } pub fn emit_fragments(module: &ir_vm::Module) -> Result { let mut emitter = BytecodeEmitter::new(); let mut mapped_const_ids = Vec::with_capacity(module.const_pool.constants.len()); for val in &module.const_pool.constants { mapped_const_ids.push(emitter.add_ir_constant(val)); } let mut asm_instrs = Vec::new(); let mut ir_instr_map = Vec::new(); let function_ranges = emitter.lower_instrs(module, &mut asm_instrs, &mut ir_instr_map, &mapped_const_ids)?; let pcs = BytecodeEmitter::calculate_pcs(&asm_instrs); let assemble_res = prometeu_bytecode::asm::assemble_with_unresolved(&asm_instrs).map_err(|e| anyhow!(e))?; let bytecode = assemble_res.code; let mut functions = Vec::new(); let mut function_names = Vec::new(); for (i, function) in module.functions.iter().enumerate() { let (start_idx, end_idx) = function_ranges[i]; let start_pc = pcs[start_idx]; let end_pc = if end_idx < pcs.len() { pcs[end_idx] } else { bytecode.len() as u32 }; functions.push(FunctionMeta { code_offset: start_pc, code_len: end_pc - start_pc, param_slots: function.param_slots, local_slots: function.local_slots, return_slots: function.return_slots, max_stack_slots: 0, // Will be filled by verifier }); function_names.push((i as u32, function.name.clone())); } let mut pc_to_span = Vec::new(); for (i, instr_opt) in ir_instr_map.iter().enumerate() { let current_pc = pcs[i]; if let Some(instr) = instr_opt { if let Some(span) = &instr.span { pc_to_span.push((current_pc, SourceSpan { file_id: span.file_id as u32, start: span.start, end: span.end, })); } } } pc_to_span.sort_by_key(|(pc, _)| *pc); pc_to_span.dedup_by_key(|(pc, _)| *pc); Ok(EmitFragments { const_pool: emitter.constant_pool, functions, code: bytecode, debug_info: Some(DebugInfo { pc_to_span, function_names, }), unresolved_labels: assemble_res.unresolved_labels, }) } /// Internal helper for managing the bytecode emission state. struct BytecodeEmitter { /// Stores constant values (like strings) that are referenced by instructions. constant_pool: Vec, } impl BytecodeEmitter { fn new() -> Self { Self { // Index 0 is traditionally reserved for Null in many VMs constant_pool: vec![ConstantPoolEntry::Null], } } /// Adds a value to the constant pool if it doesn't exist, returning its unique index. fn add_constant(&mut self, entry: ConstantPoolEntry) -> u32 { if let Some(pos) = self.constant_pool.iter().position(|e| e == &entry) { pos as u32 } else { let id = self.constant_pool.len() as u32; self.constant_pool.push(entry); id } } fn add_ir_constant(&mut self, val: &ConstantValue) -> u32 { let entry = match val { ConstantValue::Int(v) => ConstantPoolEntry::Int64(*v), ConstantValue::Float(v) => ConstantPoolEntry::Float64(*v), ConstantValue::String(s) => ConstantPoolEntry::String(s.clone()), }; self.add_constant(entry) } fn lower_instrs<'b>( &mut self, module: &'b ir_vm::Module, asm_instrs: &mut Vec, ir_instr_map: &mut Vec>, mapped_const_ids: &[u32] ) -> Result> { let mut func_names = std::collections::HashMap::new(); for func in &module.functions { func_names.insert(func.id, func.name.clone()); } let mut ranges = Vec::new(); for function in &module.functions { let start_idx = asm_instrs.len(); // Each function starts with a label for its entry point. asm_instrs.push(Asm::Label(function.name.clone())); ir_instr_map.push(None); for instr in &function.body { let op_start_idx = asm_instrs.len(); // Translate each IR instruction to its equivalent Bytecode OpCode. match &instr.kind { InstrKind::Nop => asm_instrs.push(Asm::Op(OpCode::Nop, vec![])), InstrKind::Halt => asm_instrs.push(Asm::Op(OpCode::Halt, vec![])), InstrKind::PushConst(id) => { let mapped_id = mapped_const_ids[id.0 as usize]; asm_instrs.push(Asm::Op(OpCode::PushConst, vec![Operand::U32(mapped_id)])); } InstrKind::PushBounded(val) => { asm_instrs.push(Asm::Op(OpCode::PushBounded, vec![Operand::U32(*val)])); } InstrKind::PushBool(v) => { asm_instrs.push(Asm::Op(OpCode::PushBool, vec![Operand::Bool(*v)])); } InstrKind::PushNull => { asm_instrs.push(Asm::Op(OpCode::PushConst, vec![Operand::U32(0)])); } InstrKind::Pop => asm_instrs.push(Asm::Op(OpCode::Pop, vec![])), InstrKind::Dup => asm_instrs.push(Asm::Op(OpCode::Dup, vec![])), InstrKind::Swap => asm_instrs.push(Asm::Op(OpCode::Swap, vec![])), InstrKind::Add => asm_instrs.push(Asm::Op(OpCode::Add, vec![])), InstrKind::Sub => asm_instrs.push(Asm::Op(OpCode::Sub, vec![])), InstrKind::Mul => asm_instrs.push(Asm::Op(OpCode::Mul, vec![])), InstrKind::Div => asm_instrs.push(Asm::Op(OpCode::Div, vec![])), InstrKind::Neg => asm_instrs.push(Asm::Op(OpCode::Neg, vec![])), InstrKind::Eq => asm_instrs.push(Asm::Op(OpCode::Eq, vec![])), InstrKind::Neq => asm_instrs.push(Asm::Op(OpCode::Neq, vec![])), InstrKind::Lt => asm_instrs.push(Asm::Op(OpCode::Lt, vec![])), InstrKind::Gt => asm_instrs.push(Asm::Op(OpCode::Gt, vec![])), InstrKind::Lte => asm_instrs.push(Asm::Op(OpCode::Lte, vec![])), InstrKind::Gte => asm_instrs.push(Asm::Op(OpCode::Gte, vec![])), InstrKind::And => asm_instrs.push(Asm::Op(OpCode::And, vec![])), InstrKind::Or => asm_instrs.push(Asm::Op(OpCode::Or, vec![])), InstrKind::Not => asm_instrs.push(Asm::Op(OpCode::Not, vec![])), InstrKind::BitAnd => asm_instrs.push(Asm::Op(OpCode::BitAnd, vec![])), InstrKind::BitOr => asm_instrs.push(Asm::Op(OpCode::BitOr, vec![])), InstrKind::BitXor => asm_instrs.push(Asm::Op(OpCode::BitXor, vec![])), InstrKind::Shl => asm_instrs.push(Asm::Op(OpCode::Shl, vec![])), InstrKind::Shr => asm_instrs.push(Asm::Op(OpCode::Shr, vec![])), InstrKind::LocalLoad { slot } => { asm_instrs.push(Asm::Op(OpCode::GetLocal, vec![Operand::U32(*slot)])); } InstrKind::LocalStore { slot } => { asm_instrs.push(Asm::Op(OpCode::SetLocal, vec![Operand::U32(*slot)])); } InstrKind::GetGlobal(slot) => { asm_instrs.push(Asm::Op(OpCode::GetGlobal, vec![Operand::U32(*slot)])); } InstrKind::SetGlobal(slot) => { asm_instrs.push(Asm::Op(OpCode::SetGlobal, vec![Operand::U32(*slot)])); } InstrKind::Jmp(label) => { asm_instrs.push(Asm::Op(OpCode::Jmp, vec![Operand::RelLabel(label.0.clone(), function.name.clone())])); } InstrKind::JmpIfFalse(label) => { asm_instrs.push(Asm::Op(OpCode::JmpIfFalse, vec![Operand::RelLabel(label.0.clone(), function.name.clone())])); } InstrKind::Label(label) => { asm_instrs.push(Asm::Label(label.0.clone())); } InstrKind::Call { func_id, .. } => { let name = func_names.get(func_id).ok_or_else(|| anyhow!("Undefined function ID: {:?}", func_id))?; asm_instrs.push(Asm::Op(OpCode::Call, vec![Operand::Label(name.clone())])); } InstrKind::ImportCall { dep_alias, module_path, symbol_name, .. } => { let label = format!("@{}::{}:{}", dep_alias, module_path, symbol_name); asm_instrs.push(Asm::Op(OpCode::Call, vec![Operand::Label(label)])); } InstrKind::Ret => asm_instrs.push(Asm::Op(OpCode::Ret, vec![])), InstrKind::Syscall(id) => { asm_instrs.push(Asm::Op(OpCode::Syscall, vec![Operand::U32(*id)])); } InstrKind::FrameSync => asm_instrs.push(Asm::Op(OpCode::FrameSync, vec![])), InstrKind::Alloc { type_id, slots } => { asm_instrs.push(Asm::Op(OpCode::Alloc, vec![Operand::U32(type_id.0), Operand::U32(*slots)])); } InstrKind::GateLoad { offset } => { asm_instrs.push(Asm::Op(OpCode::GateLoad, vec![Operand::U32(*offset)])); } InstrKind::GateStore { offset } => { asm_instrs.push(Asm::Op(OpCode::GateStore, vec![Operand::U32(*offset)])); } InstrKind::GateBeginPeek => asm_instrs.push(Asm::Op(OpCode::GateBeginPeek, vec![])), InstrKind::GateEndPeek => asm_instrs.push(Asm::Op(OpCode::GateEndPeek, vec![])), InstrKind::GateBeginBorrow => asm_instrs.push(Asm::Op(OpCode::GateBeginBorrow, vec![])), InstrKind::GateEndBorrow => asm_instrs.push(Asm::Op(OpCode::GateEndBorrow, vec![])), InstrKind::GateBeginMutate => asm_instrs.push(Asm::Op(OpCode::GateBeginMutate, vec![])), InstrKind::GateEndMutate => asm_instrs.push(Asm::Op(OpCode::GateEndMutate, vec![])), InstrKind::GateRetain => asm_instrs.push(Asm::Op(OpCode::GateRetain, vec![])), InstrKind::GateRelease => asm_instrs.push(Asm::Op(OpCode::GateRelease, vec![])), } let op_end_idx = asm_instrs.len(); for _ in op_start_idx..op_end_idx { ir_instr_map.push(Some(instr)); } } let end_idx = asm_instrs.len(); ranges.push((start_idx, end_idx)); } Ok(ranges) } fn calculate_pcs(asm_instrs: &[Asm]) -> Vec { let mut pcs = Vec::with_capacity(asm_instrs.len()); let mut current_pc = 0u32; for instr in asm_instrs { pcs.push(current_pc); match instr { Asm::Label(_) => {} Asm::Op(_opcode, operands) => { current_pc += 2; current_pc = update_pc_by_operand(current_pc, operands); } } } pcs } } #[cfg(test)] mod tests { use super::*; use crate::ir_core::const_pool::ConstantValue; use crate::ir_core::ids::FunctionId; use crate::ir_vm::instr::{InstrKind, Instruction}; use crate::ir_vm::module::{Function, Module}; use crate::ir_vm::types::Type; use prometeu_bytecode::{BytecodeLoader, ConstantPoolEntry}; #[test] fn test_emit_module_with_const_pool() { let mut module = Module::new("test".to_string()); let id_int = module.const_pool.insert(ConstantValue::Int(12345)); let id_str = module.const_pool.insert(ConstantValue::String("hello".to_string())); let function = Function { id: FunctionId(0), name: "main".to_string(), params: vec![], return_type: Type::Void, body: vec![ Instruction::new(InstrKind::PushConst(ir_vm::ConstId(id_int.0)), None), Instruction::new(InstrKind::PushConst(ir_vm::ConstId(id_str.0)), None), Instruction::new(InstrKind::Ret, None), ], param_slots: 0, local_slots: 0, return_slots: 0, }; module.functions.push(function); let result = emit_module(&module).expect("Failed to emit module"); let pbc = BytecodeLoader::load(&result.rom).expect("Failed to parse emitted PBC"); assert_eq!(pbc.const_pool.len(), 3); assert_eq!(pbc.const_pool[0], ConstantPoolEntry::Null); assert_eq!(pbc.const_pool[1], ConstantPoolEntry::Int64(12345)); assert_eq!(pbc.const_pool[2], ConstantPoolEntry::String("hello".to_string())); } }