bQUARKz f9120e740b
dev/pbs (#8)
Co-authored-by: Nilton Constantino <nilton.constantino@visma.com>
Reviewed-on: #8
2026-03-24 13:40:22 +00:00

341 lines
15 KiB
Rust

//! # Bytecode Emitter
//!
//! This module is responsible for the final stage of the compilation process:
//! converting the Intermediate Representation (IR) into the binary Prometeu ByteCode (PBC) format.
//!
//! It performs two main tasks:
//! 1. **Instruction Lowering**: Translates `ir_vm::Instruction` into `prometeu_bytecode::asm::Asm` ops.
//! 2. **DebugSymbol Mapping**: Associates bytecode offsets (Program Counter) with source code locations.
use crate::ir_core::ConstantValue;
use crate::ir_vm;
use crate::ir_vm::instr::InstrKind;
use anyhow::{anyhow, Result};
use prometeu_bytecode::abi::SourceSpan;
use prometeu_bytecode::asm::{update_pc_by_operand, Asm, Operand};
use prometeu_bytecode::opcode::OpCode;
use prometeu_bytecode::{BytecodeModule, ConstantPoolEntry, DebugInfo, FunctionMeta};
/// The final output of the code generation phase.
pub struct EmitResult {
/// The serialized binary data of the PBC file.
pub rom: Vec<u8>,
}
pub struct EmitFragments {
pub const_pool: Vec<ConstantPoolEntry>,
pub functions: Vec<FunctionMeta>,
pub code: Vec<u8>,
pub debug_info: Option<DebugInfo>,
pub unresolved_labels: std::collections::HashMap<String, Vec<u32>>,
}
/// Entry point for emitting a bytecode module from the IR.
pub fn emit_module(module: &ir_vm::Module) -> Result<EmitResult> {
let fragments = emit_fragments(module)?;
let exports: Vec<_> = module.functions.iter().enumerate().map(|(i, f)| {
prometeu_bytecode::Export {
symbol: f.name.clone(),
func_idx: i as u32,
}
}).collect();
let bytecode_module = BytecodeModule {
version: 0,
const_pool: fragments.const_pool,
functions: fragments.functions,
code: fragments.code,
debug_info: fragments.debug_info,
exports,
};
Ok(EmitResult {
rom: bytecode_module.serialize(),
})
}
pub fn emit_fragments(module: &ir_vm::Module) -> Result<EmitFragments> {
let mut emitter = BytecodeEmitter::new();
let mut mapped_const_ids = Vec::with_capacity(module.const_pool.constants.len());
for val in &module.const_pool.constants {
mapped_const_ids.push(emitter.add_ir_constant(val));
}
let mut asm_instrs = Vec::new();
let mut ir_instr_map = Vec::new();
let function_ranges = emitter.lower_instrs(module, &mut asm_instrs, &mut ir_instr_map, &mapped_const_ids)?;
let pcs = BytecodeEmitter::calculate_pcs(&asm_instrs);
let assemble_res = prometeu_bytecode::asm::assemble_with_unresolved(&asm_instrs).map_err(|e| anyhow!(e))?;
let bytecode = assemble_res.code;
let mut functions = Vec::new();
let mut function_names = Vec::new();
for (i, function) in module.functions.iter().enumerate() {
let (start_idx, end_idx) = function_ranges[i];
let start_pc = pcs[start_idx];
let end_pc = if end_idx < pcs.len() { pcs[end_idx] } else { bytecode.len() as u32 };
functions.push(FunctionMeta {
code_offset: start_pc,
code_len: end_pc - start_pc,
param_slots: function.param_slots,
local_slots: function.local_slots,
return_slots: function.return_slots,
max_stack_slots: 0, // Will be filled by verifier
});
function_names.push((i as u32, function.name.clone()));
}
let mut pc_to_span = Vec::new();
for (i, instr_opt) in ir_instr_map.iter().enumerate() {
let current_pc = pcs[i];
if let Some(instr) = instr_opt {
if let Some(span) = &instr.span {
pc_to_span.push((current_pc, SourceSpan {
file_id: span.file_id as u32,
start: span.start,
end: span.end,
}));
}
}
}
pc_to_span.sort_by_key(|(pc, _)| *pc);
pc_to_span.dedup_by_key(|(pc, _)| *pc);
Ok(EmitFragments {
const_pool: emitter.constant_pool,
functions,
code: bytecode,
debug_info: Some(DebugInfo {
pc_to_span,
function_names,
}),
unresolved_labels: assemble_res.unresolved_labels,
})
}
/// Internal helper for managing the bytecode emission state.
struct BytecodeEmitter {
/// Stores constant values (like strings) that are referenced by instructions.
constant_pool: Vec<ConstantPoolEntry>,
}
impl BytecodeEmitter {
fn new() -> Self {
Self {
// Index 0 is traditionally reserved for Null in many VMs
constant_pool: vec![ConstantPoolEntry::Null],
}
}
/// Adds a value to the constant pool if it doesn't exist, returning its unique index.
fn add_constant(&mut self, entry: ConstantPoolEntry) -> u32 {
if let Some(pos) = self.constant_pool.iter().position(|e| e == &entry) {
pos as u32
} else {
let id = self.constant_pool.len() as u32;
self.constant_pool.push(entry);
id
}
}
fn add_ir_constant(&mut self, val: &ConstantValue) -> u32 {
let entry = match val {
ConstantValue::Int(v) => ConstantPoolEntry::Int64(*v),
ConstantValue::Float(v) => ConstantPoolEntry::Float64(*v),
ConstantValue::String(s) => ConstantPoolEntry::String(s.clone()),
};
self.add_constant(entry)
}
fn lower_instrs<'b>(
&mut self,
module: &'b ir_vm::Module,
asm_instrs: &mut Vec<Asm>,
ir_instr_map: &mut Vec<Option<&'b ir_vm::Instruction>>,
mapped_const_ids: &[u32]
) -> Result<Vec<(usize, usize)>> {
let mut func_names = std::collections::HashMap::new();
for func in &module.functions {
func_names.insert(func.id, func.name.clone());
}
let mut ranges = Vec::new();
for function in &module.functions {
let start_idx = asm_instrs.len();
// Each function starts with a label for its entry point.
asm_instrs.push(Asm::Label(function.name.clone()));
ir_instr_map.push(None);
for instr in &function.body {
let op_start_idx = asm_instrs.len();
// Translate each IR instruction to its equivalent Bytecode OpCode.
match &instr.kind {
InstrKind::Nop => asm_instrs.push(Asm::Op(OpCode::Nop, vec![])),
InstrKind::Halt => asm_instrs.push(Asm::Op(OpCode::Halt, vec![])),
InstrKind::PushConst(id) => {
let mapped_id = mapped_const_ids[id.0 as usize];
asm_instrs.push(Asm::Op(OpCode::PushConst, vec![Operand::U32(mapped_id)]));
}
InstrKind::PushBounded(val) => {
asm_instrs.push(Asm::Op(OpCode::PushBounded, vec![Operand::U32(*val)]));
}
InstrKind::PushBool(v) => {
asm_instrs.push(Asm::Op(OpCode::PushBool, vec![Operand::Bool(*v)]));
}
InstrKind::PushNull => {
asm_instrs.push(Asm::Op(OpCode::PushConst, vec![Operand::U32(0)]));
}
InstrKind::Pop => asm_instrs.push(Asm::Op(OpCode::Pop, vec![])),
InstrKind::Dup => asm_instrs.push(Asm::Op(OpCode::Dup, vec![])),
InstrKind::Swap => asm_instrs.push(Asm::Op(OpCode::Swap, vec![])),
InstrKind::Add => asm_instrs.push(Asm::Op(OpCode::Add, vec![])),
InstrKind::Sub => asm_instrs.push(Asm::Op(OpCode::Sub, vec![])),
InstrKind::Mul => asm_instrs.push(Asm::Op(OpCode::Mul, vec![])),
InstrKind::Div => asm_instrs.push(Asm::Op(OpCode::Div, vec![])),
InstrKind::Neg => asm_instrs.push(Asm::Op(OpCode::Neg, vec![])),
InstrKind::Eq => asm_instrs.push(Asm::Op(OpCode::Eq, vec![])),
InstrKind::Neq => asm_instrs.push(Asm::Op(OpCode::Neq, vec![])),
InstrKind::Lt => asm_instrs.push(Asm::Op(OpCode::Lt, vec![])),
InstrKind::Gt => asm_instrs.push(Asm::Op(OpCode::Gt, vec![])),
InstrKind::Lte => asm_instrs.push(Asm::Op(OpCode::Lte, vec![])),
InstrKind::Gte => asm_instrs.push(Asm::Op(OpCode::Gte, vec![])),
InstrKind::And => asm_instrs.push(Asm::Op(OpCode::And, vec![])),
InstrKind::Or => asm_instrs.push(Asm::Op(OpCode::Or, vec![])),
InstrKind::Not => asm_instrs.push(Asm::Op(OpCode::Not, vec![])),
InstrKind::BitAnd => asm_instrs.push(Asm::Op(OpCode::BitAnd, vec![])),
InstrKind::BitOr => asm_instrs.push(Asm::Op(OpCode::BitOr, vec![])),
InstrKind::BitXor => asm_instrs.push(Asm::Op(OpCode::BitXor, vec![])),
InstrKind::Shl => asm_instrs.push(Asm::Op(OpCode::Shl, vec![])),
InstrKind::Shr => asm_instrs.push(Asm::Op(OpCode::Shr, vec![])),
InstrKind::LocalLoad { slot } => {
asm_instrs.push(Asm::Op(OpCode::GetLocal, vec![Operand::U32(*slot)]));
}
InstrKind::LocalStore { slot } => {
asm_instrs.push(Asm::Op(OpCode::SetLocal, vec![Operand::U32(*slot)]));
}
InstrKind::GetGlobal(slot) => {
asm_instrs.push(Asm::Op(OpCode::GetGlobal, vec![Operand::U32(*slot)]));
}
InstrKind::SetGlobal(slot) => {
asm_instrs.push(Asm::Op(OpCode::SetGlobal, vec![Operand::U32(*slot)]));
}
InstrKind::Jmp(label) => {
asm_instrs.push(Asm::Op(OpCode::Jmp, vec![Operand::RelLabel(label.0.clone(), function.name.clone())]));
}
InstrKind::JmpIfFalse(label) => {
asm_instrs.push(Asm::Op(OpCode::JmpIfFalse, vec![Operand::RelLabel(label.0.clone(), function.name.clone())]));
}
InstrKind::Label(label) => {
asm_instrs.push(Asm::Label(label.0.clone()));
}
InstrKind::Call { func_id, .. } => {
let name = func_names.get(func_id).ok_or_else(|| anyhow!("Undefined function ID: {:?}", func_id))?;
asm_instrs.push(Asm::Op(OpCode::Call, vec![Operand::Label(name.clone())]));
}
InstrKind::ImportCall { dep_alias, module_path, symbol_name, .. } => {
let label = format!("@{}::{}:{}", dep_alias, module_path, symbol_name);
asm_instrs.push(Asm::Op(OpCode::Call, vec![Operand::Label(label)]));
}
InstrKind::Ret => asm_instrs.push(Asm::Op(OpCode::Ret, vec![])),
InstrKind::Syscall(id) => {
asm_instrs.push(Asm::Op(OpCode::Syscall, vec![Operand::U32(*id)]));
}
InstrKind::FrameSync => asm_instrs.push(Asm::Op(OpCode::FrameSync, vec![])),
InstrKind::Alloc { type_id, slots } => {
asm_instrs.push(Asm::Op(OpCode::Alloc, vec![Operand::U32(type_id.0), Operand::U32(*slots)]));
}
InstrKind::GateLoad { offset } => {
asm_instrs.push(Asm::Op(OpCode::GateLoad, vec![Operand::U32(*offset)]));
}
InstrKind::GateStore { offset } => {
asm_instrs.push(Asm::Op(OpCode::GateStore, vec![Operand::U32(*offset)]));
}
InstrKind::GateBeginPeek => asm_instrs.push(Asm::Op(OpCode::GateBeginPeek, vec![])),
InstrKind::GateEndPeek => asm_instrs.push(Asm::Op(OpCode::GateEndPeek, vec![])),
InstrKind::GateBeginBorrow => asm_instrs.push(Asm::Op(OpCode::GateBeginBorrow, vec![])),
InstrKind::GateEndBorrow => asm_instrs.push(Asm::Op(OpCode::GateEndBorrow, vec![])),
InstrKind::GateBeginMutate => asm_instrs.push(Asm::Op(OpCode::GateBeginMutate, vec![])),
InstrKind::GateEndMutate => asm_instrs.push(Asm::Op(OpCode::GateEndMutate, vec![])),
InstrKind::GateRetain => asm_instrs.push(Asm::Op(OpCode::GateRetain, vec![])),
InstrKind::GateRelease => asm_instrs.push(Asm::Op(OpCode::GateRelease, vec![])),
}
let op_end_idx = asm_instrs.len();
for _ in op_start_idx..op_end_idx {
ir_instr_map.push(Some(instr));
}
}
let end_idx = asm_instrs.len();
ranges.push((start_idx, end_idx));
}
Ok(ranges)
}
fn calculate_pcs(asm_instrs: &[Asm]) -> Vec<u32> {
let mut pcs = Vec::with_capacity(asm_instrs.len());
let mut current_pc = 0u32;
for instr in asm_instrs {
pcs.push(current_pc);
match instr {
Asm::Label(_) => {}
Asm::Op(_opcode, operands) => {
current_pc += 2;
current_pc = update_pc_by_operand(current_pc, operands);
}
}
}
pcs
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir_core::const_pool::ConstantValue;
use crate::ir_core::ids::FunctionId;
use crate::ir_vm::instr::{InstrKind, Instruction};
use crate::ir_vm::module::{Function, Module};
use crate::ir_vm::types::Type;
use prometeu_bytecode::{BytecodeLoader, ConstantPoolEntry};
#[test]
fn test_emit_module_with_const_pool() {
let mut module = Module::new("test".to_string());
let id_int = module.const_pool.insert(ConstantValue::Int(12345));
let id_str = module.const_pool.insert(ConstantValue::String("hello".to_string()));
let function = Function {
id: FunctionId(0),
name: "main".to_string(),
params: vec![],
return_type: Type::Void,
body: vec![
Instruction::new(InstrKind::PushConst(ir_vm::ConstId(id_int.0)), None),
Instruction::new(InstrKind::PushConst(ir_vm::ConstId(id_str.0)), None),
Instruction::new(InstrKind::Ret, None),
],
param_slots: 0,
local_slots: 0,
return_slots: 0,
};
module.functions.push(function);
let result = emit_module(&module).expect("Failed to emit module");
let pbc = BytecodeLoader::load(&result.rom).expect("Failed to parse emitted PBC");
assert_eq!(pbc.const_pool.len(), 3);
assert_eq!(pbc.const_pool[0], ConstantPoolEntry::Null);
assert_eq!(pbc.const_pool[1], ConstantPoolEntry::Int64(12345));
assert_eq!(pbc.const_pool[2], ConstantPoolEntry::String("hello".to_string()));
}
}