Co-authored-by: Nilton Constantino <nilton.constantino@visma.com> Reviewed-on: #8
341 lines
15 KiB
Rust
341 lines
15 KiB
Rust
//! # Bytecode Emitter
|
|
//!
|
|
//! This module is responsible for the final stage of the compilation process:
|
|
//! converting the Intermediate Representation (IR) into the binary Prometeu ByteCode (PBC) format.
|
|
//!
|
|
//! It performs two main tasks:
|
|
//! 1. **Instruction Lowering**: Translates `ir_vm::Instruction` into `prometeu_bytecode::asm::Asm` ops.
|
|
//! 2. **DebugSymbol Mapping**: Associates bytecode offsets (Program Counter) with source code locations.
|
|
|
|
use crate::ir_core::ConstantValue;
|
|
use crate::ir_vm;
|
|
use crate::ir_vm::instr::InstrKind;
|
|
use anyhow::{anyhow, Result};
|
|
use prometeu_bytecode::abi::SourceSpan;
|
|
use prometeu_bytecode::asm::{update_pc_by_operand, Asm, Operand};
|
|
use prometeu_bytecode::opcode::OpCode;
|
|
use prometeu_bytecode::{BytecodeModule, ConstantPoolEntry, DebugInfo, FunctionMeta};
|
|
|
|
/// The final output of the code generation phase.
|
|
pub struct EmitResult {
|
|
/// The serialized binary data of the PBC file.
|
|
pub rom: Vec<u8>,
|
|
}
|
|
|
|
pub struct EmitFragments {
|
|
pub const_pool: Vec<ConstantPoolEntry>,
|
|
pub functions: Vec<FunctionMeta>,
|
|
pub code: Vec<u8>,
|
|
pub debug_info: Option<DebugInfo>,
|
|
pub unresolved_labels: std::collections::HashMap<String, Vec<u32>>,
|
|
}
|
|
|
|
/// Entry point for emitting a bytecode module from the IR.
|
|
pub fn emit_module(module: &ir_vm::Module) -> Result<EmitResult> {
|
|
let fragments = emit_fragments(module)?;
|
|
|
|
let exports: Vec<_> = module.functions.iter().enumerate().map(|(i, f)| {
|
|
prometeu_bytecode::Export {
|
|
symbol: f.name.clone(),
|
|
func_idx: i as u32,
|
|
}
|
|
}).collect();
|
|
|
|
let bytecode_module = BytecodeModule {
|
|
version: 0,
|
|
const_pool: fragments.const_pool,
|
|
functions: fragments.functions,
|
|
code: fragments.code,
|
|
debug_info: fragments.debug_info,
|
|
exports,
|
|
};
|
|
|
|
Ok(EmitResult {
|
|
rom: bytecode_module.serialize(),
|
|
})
|
|
}
|
|
|
|
pub fn emit_fragments(module: &ir_vm::Module) -> Result<EmitFragments> {
|
|
let mut emitter = BytecodeEmitter::new();
|
|
|
|
let mut mapped_const_ids = Vec::with_capacity(module.const_pool.constants.len());
|
|
for val in &module.const_pool.constants {
|
|
mapped_const_ids.push(emitter.add_ir_constant(val));
|
|
}
|
|
|
|
let mut asm_instrs = Vec::new();
|
|
let mut ir_instr_map = Vec::new();
|
|
let function_ranges = emitter.lower_instrs(module, &mut asm_instrs, &mut ir_instr_map, &mapped_const_ids)?;
|
|
|
|
let pcs = BytecodeEmitter::calculate_pcs(&asm_instrs);
|
|
let assemble_res = prometeu_bytecode::asm::assemble_with_unresolved(&asm_instrs).map_err(|e| anyhow!(e))?;
|
|
let bytecode = assemble_res.code;
|
|
|
|
let mut functions = Vec::new();
|
|
let mut function_names = Vec::new();
|
|
for (i, function) in module.functions.iter().enumerate() {
|
|
let (start_idx, end_idx) = function_ranges[i];
|
|
let start_pc = pcs[start_idx];
|
|
let end_pc = if end_idx < pcs.len() { pcs[end_idx] } else { bytecode.len() as u32 };
|
|
|
|
functions.push(FunctionMeta {
|
|
code_offset: start_pc,
|
|
code_len: end_pc - start_pc,
|
|
param_slots: function.param_slots,
|
|
local_slots: function.local_slots,
|
|
return_slots: function.return_slots,
|
|
max_stack_slots: 0, // Will be filled by verifier
|
|
});
|
|
function_names.push((i as u32, function.name.clone()));
|
|
}
|
|
|
|
let mut pc_to_span = Vec::new();
|
|
for (i, instr_opt) in ir_instr_map.iter().enumerate() {
|
|
let current_pc = pcs[i];
|
|
if let Some(instr) = instr_opt {
|
|
if let Some(span) = &instr.span {
|
|
pc_to_span.push((current_pc, SourceSpan {
|
|
file_id: span.file_id as u32,
|
|
start: span.start,
|
|
end: span.end,
|
|
}));
|
|
}
|
|
}
|
|
}
|
|
pc_to_span.sort_by_key(|(pc, _)| *pc);
|
|
pc_to_span.dedup_by_key(|(pc, _)| *pc);
|
|
|
|
Ok(EmitFragments {
|
|
const_pool: emitter.constant_pool,
|
|
functions,
|
|
code: bytecode,
|
|
debug_info: Some(DebugInfo {
|
|
pc_to_span,
|
|
function_names,
|
|
}),
|
|
unresolved_labels: assemble_res.unresolved_labels,
|
|
})
|
|
}
|
|
|
|
/// Internal helper for managing the bytecode emission state.
|
|
struct BytecodeEmitter {
|
|
/// Stores constant values (like strings) that are referenced by instructions.
|
|
constant_pool: Vec<ConstantPoolEntry>,
|
|
}
|
|
|
|
impl BytecodeEmitter {
|
|
fn new() -> Self {
|
|
Self {
|
|
// Index 0 is traditionally reserved for Null in many VMs
|
|
constant_pool: vec![ConstantPoolEntry::Null],
|
|
}
|
|
}
|
|
|
|
/// Adds a value to the constant pool if it doesn't exist, returning its unique index.
|
|
fn add_constant(&mut self, entry: ConstantPoolEntry) -> u32 {
|
|
if let Some(pos) = self.constant_pool.iter().position(|e| e == &entry) {
|
|
pos as u32
|
|
} else {
|
|
let id = self.constant_pool.len() as u32;
|
|
self.constant_pool.push(entry);
|
|
id
|
|
}
|
|
}
|
|
|
|
fn add_ir_constant(&mut self, val: &ConstantValue) -> u32 {
|
|
let entry = match val {
|
|
ConstantValue::Int(v) => ConstantPoolEntry::Int64(*v),
|
|
ConstantValue::Float(v) => ConstantPoolEntry::Float64(*v),
|
|
ConstantValue::String(s) => ConstantPoolEntry::String(s.clone()),
|
|
};
|
|
self.add_constant(entry)
|
|
}
|
|
|
|
fn lower_instrs<'b>(
|
|
&mut self,
|
|
module: &'b ir_vm::Module,
|
|
asm_instrs: &mut Vec<Asm>,
|
|
ir_instr_map: &mut Vec<Option<&'b ir_vm::Instruction>>,
|
|
mapped_const_ids: &[u32]
|
|
) -> Result<Vec<(usize, usize)>> {
|
|
let mut func_names = std::collections::HashMap::new();
|
|
for func in &module.functions {
|
|
func_names.insert(func.id, func.name.clone());
|
|
}
|
|
|
|
let mut ranges = Vec::new();
|
|
|
|
for function in &module.functions {
|
|
let start_idx = asm_instrs.len();
|
|
// Each function starts with a label for its entry point.
|
|
asm_instrs.push(Asm::Label(function.name.clone()));
|
|
ir_instr_map.push(None);
|
|
|
|
for instr in &function.body {
|
|
let op_start_idx = asm_instrs.len();
|
|
|
|
// Translate each IR instruction to its equivalent Bytecode OpCode.
|
|
match &instr.kind {
|
|
InstrKind::Nop => asm_instrs.push(Asm::Op(OpCode::Nop, vec![])),
|
|
InstrKind::Halt => asm_instrs.push(Asm::Op(OpCode::Halt, vec![])),
|
|
InstrKind::PushConst(id) => {
|
|
let mapped_id = mapped_const_ids[id.0 as usize];
|
|
asm_instrs.push(Asm::Op(OpCode::PushConst, vec![Operand::U32(mapped_id)]));
|
|
}
|
|
InstrKind::PushBounded(val) => {
|
|
asm_instrs.push(Asm::Op(OpCode::PushBounded, vec![Operand::U32(*val)]));
|
|
}
|
|
InstrKind::PushBool(v) => {
|
|
asm_instrs.push(Asm::Op(OpCode::PushBool, vec![Operand::Bool(*v)]));
|
|
}
|
|
InstrKind::PushNull => {
|
|
asm_instrs.push(Asm::Op(OpCode::PushConst, vec![Operand::U32(0)]));
|
|
}
|
|
InstrKind::Pop => asm_instrs.push(Asm::Op(OpCode::Pop, vec![])),
|
|
InstrKind::Dup => asm_instrs.push(Asm::Op(OpCode::Dup, vec![])),
|
|
InstrKind::Swap => asm_instrs.push(Asm::Op(OpCode::Swap, vec![])),
|
|
InstrKind::Add => asm_instrs.push(Asm::Op(OpCode::Add, vec![])),
|
|
InstrKind::Sub => asm_instrs.push(Asm::Op(OpCode::Sub, vec![])),
|
|
InstrKind::Mul => asm_instrs.push(Asm::Op(OpCode::Mul, vec![])),
|
|
InstrKind::Div => asm_instrs.push(Asm::Op(OpCode::Div, vec![])),
|
|
InstrKind::Neg => asm_instrs.push(Asm::Op(OpCode::Neg, vec![])),
|
|
InstrKind::Eq => asm_instrs.push(Asm::Op(OpCode::Eq, vec![])),
|
|
InstrKind::Neq => asm_instrs.push(Asm::Op(OpCode::Neq, vec![])),
|
|
InstrKind::Lt => asm_instrs.push(Asm::Op(OpCode::Lt, vec![])),
|
|
InstrKind::Gt => asm_instrs.push(Asm::Op(OpCode::Gt, vec![])),
|
|
InstrKind::Lte => asm_instrs.push(Asm::Op(OpCode::Lte, vec![])),
|
|
InstrKind::Gte => asm_instrs.push(Asm::Op(OpCode::Gte, vec![])),
|
|
InstrKind::And => asm_instrs.push(Asm::Op(OpCode::And, vec![])),
|
|
InstrKind::Or => asm_instrs.push(Asm::Op(OpCode::Or, vec![])),
|
|
InstrKind::Not => asm_instrs.push(Asm::Op(OpCode::Not, vec![])),
|
|
InstrKind::BitAnd => asm_instrs.push(Asm::Op(OpCode::BitAnd, vec![])),
|
|
InstrKind::BitOr => asm_instrs.push(Asm::Op(OpCode::BitOr, vec![])),
|
|
InstrKind::BitXor => asm_instrs.push(Asm::Op(OpCode::BitXor, vec![])),
|
|
InstrKind::Shl => asm_instrs.push(Asm::Op(OpCode::Shl, vec![])),
|
|
InstrKind::Shr => asm_instrs.push(Asm::Op(OpCode::Shr, vec![])),
|
|
InstrKind::LocalLoad { slot } => {
|
|
asm_instrs.push(Asm::Op(OpCode::GetLocal, vec![Operand::U32(*slot)]));
|
|
}
|
|
InstrKind::LocalStore { slot } => {
|
|
asm_instrs.push(Asm::Op(OpCode::SetLocal, vec![Operand::U32(*slot)]));
|
|
}
|
|
InstrKind::GetGlobal(slot) => {
|
|
asm_instrs.push(Asm::Op(OpCode::GetGlobal, vec![Operand::U32(*slot)]));
|
|
}
|
|
InstrKind::SetGlobal(slot) => {
|
|
asm_instrs.push(Asm::Op(OpCode::SetGlobal, vec![Operand::U32(*slot)]));
|
|
}
|
|
InstrKind::Jmp(label) => {
|
|
asm_instrs.push(Asm::Op(OpCode::Jmp, vec![Operand::RelLabel(label.0.clone(), function.name.clone())]));
|
|
}
|
|
InstrKind::JmpIfFalse(label) => {
|
|
asm_instrs.push(Asm::Op(OpCode::JmpIfFalse, vec![Operand::RelLabel(label.0.clone(), function.name.clone())]));
|
|
}
|
|
InstrKind::Label(label) => {
|
|
asm_instrs.push(Asm::Label(label.0.clone()));
|
|
}
|
|
InstrKind::Call { func_id, .. } => {
|
|
let name = func_names.get(func_id).ok_or_else(|| anyhow!("Undefined function ID: {:?}", func_id))?;
|
|
asm_instrs.push(Asm::Op(OpCode::Call, vec![Operand::Label(name.clone())]));
|
|
}
|
|
InstrKind::ImportCall { dep_alias, module_path, symbol_name, .. } => {
|
|
let label = format!("@{}::{}:{}", dep_alias, module_path, symbol_name);
|
|
asm_instrs.push(Asm::Op(OpCode::Call, vec![Operand::Label(label)]));
|
|
}
|
|
InstrKind::Ret => asm_instrs.push(Asm::Op(OpCode::Ret, vec![])),
|
|
InstrKind::Syscall(id) => {
|
|
asm_instrs.push(Asm::Op(OpCode::Syscall, vec![Operand::U32(*id)]));
|
|
}
|
|
InstrKind::FrameSync => asm_instrs.push(Asm::Op(OpCode::FrameSync, vec![])),
|
|
InstrKind::Alloc { type_id, slots } => {
|
|
asm_instrs.push(Asm::Op(OpCode::Alloc, vec![Operand::U32(type_id.0), Operand::U32(*slots)]));
|
|
}
|
|
InstrKind::GateLoad { offset } => {
|
|
asm_instrs.push(Asm::Op(OpCode::GateLoad, vec![Operand::U32(*offset)]));
|
|
}
|
|
InstrKind::GateStore { offset } => {
|
|
asm_instrs.push(Asm::Op(OpCode::GateStore, vec![Operand::U32(*offset)]));
|
|
}
|
|
InstrKind::GateBeginPeek => asm_instrs.push(Asm::Op(OpCode::GateBeginPeek, vec![])),
|
|
InstrKind::GateEndPeek => asm_instrs.push(Asm::Op(OpCode::GateEndPeek, vec![])),
|
|
InstrKind::GateBeginBorrow => asm_instrs.push(Asm::Op(OpCode::GateBeginBorrow, vec![])),
|
|
InstrKind::GateEndBorrow => asm_instrs.push(Asm::Op(OpCode::GateEndBorrow, vec![])),
|
|
InstrKind::GateBeginMutate => asm_instrs.push(Asm::Op(OpCode::GateBeginMutate, vec![])),
|
|
InstrKind::GateEndMutate => asm_instrs.push(Asm::Op(OpCode::GateEndMutate, vec![])),
|
|
InstrKind::GateRetain => asm_instrs.push(Asm::Op(OpCode::GateRetain, vec![])),
|
|
InstrKind::GateRelease => asm_instrs.push(Asm::Op(OpCode::GateRelease, vec![])),
|
|
}
|
|
|
|
let op_end_idx = asm_instrs.len();
|
|
for _ in op_start_idx..op_end_idx {
|
|
ir_instr_map.push(Some(instr));
|
|
}
|
|
}
|
|
let end_idx = asm_instrs.len();
|
|
ranges.push((start_idx, end_idx));
|
|
}
|
|
Ok(ranges)
|
|
}
|
|
|
|
fn calculate_pcs(asm_instrs: &[Asm]) -> Vec<u32> {
|
|
let mut pcs = Vec::with_capacity(asm_instrs.len());
|
|
let mut current_pc = 0u32;
|
|
for instr in asm_instrs {
|
|
pcs.push(current_pc);
|
|
match instr {
|
|
Asm::Label(_) => {}
|
|
Asm::Op(_opcode, operands) => {
|
|
current_pc += 2;
|
|
current_pc = update_pc_by_operand(current_pc, operands);
|
|
}
|
|
}
|
|
}
|
|
pcs
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::ir_core::const_pool::ConstantValue;
|
|
use crate::ir_core::ids::FunctionId;
|
|
use crate::ir_vm::instr::{InstrKind, Instruction};
|
|
use crate::ir_vm::module::{Function, Module};
|
|
use crate::ir_vm::types::Type;
|
|
use prometeu_bytecode::{BytecodeLoader, ConstantPoolEntry};
|
|
|
|
#[test]
|
|
fn test_emit_module_with_const_pool() {
|
|
let mut module = Module::new("test".to_string());
|
|
|
|
let id_int = module.const_pool.insert(ConstantValue::Int(12345));
|
|
let id_str = module.const_pool.insert(ConstantValue::String("hello".to_string()));
|
|
|
|
let function = Function {
|
|
id: FunctionId(0),
|
|
name: "main".to_string(),
|
|
params: vec![],
|
|
return_type: Type::Void,
|
|
body: vec![
|
|
Instruction::new(InstrKind::PushConst(ir_vm::ConstId(id_int.0)), None),
|
|
Instruction::new(InstrKind::PushConst(ir_vm::ConstId(id_str.0)), None),
|
|
Instruction::new(InstrKind::Ret, None),
|
|
],
|
|
param_slots: 0,
|
|
local_slots: 0,
|
|
return_slots: 0,
|
|
};
|
|
|
|
module.functions.push(function);
|
|
|
|
let result = emit_module(&module).expect("Failed to emit module");
|
|
|
|
let pbc = BytecodeLoader::load(&result.rom).expect("Failed to parse emitted PBC");
|
|
|
|
assert_eq!(pbc.const_pool.len(), 3);
|
|
assert_eq!(pbc.const_pool[0], ConstantPoolEntry::Null);
|
|
assert_eq!(pbc.const_pool[1], ConstantPoolEntry::Int64(12345));
|
|
assert_eq!(pbc.const_pool[2], ConstantPoolEntry::String("hello".to_string()));
|
|
}
|
|
}
|