2026-03-24 13:40:45 +00:00

121 lines
4.4 KiB
Rust

//! Deterministic disassembler for Prometeu Bytecode (PBC).
//!
//! Goals:
//! - Stable formatting across platforms (snapshot-friendly).
//! - Complete coverage of the Core ISA, including closures/coroutines.
//! - Roundtrip-safe with the paired `assembler` module.
//!
//! Format (one instruction per line):
//! - `MNEMONIC` for zero-operand instructions.
//! - `MNEMONIC <imm>` for 1-operand instructions (decimal unless stated).
//! - Special operand formats:
//! - `PUSH_F64 f64:0xhhhhhhhhhhhhhhhh` — exact IEEE-754 bits in hex (little-endian to_bits()).
//! - `MAKE_CLOSURE fn=<u32>, captures=<u32>`
//! - `SPAWN fn=<u32>, argc=<u32>`
//! - `CALL_CLOSURE argc=<u32>`
//! - `HOSTCALL <index>` is printed in decimal because it is a `SYSC` table index.
//! - `SYSCALL` is printed as `SYSCALL 0xhhhh` (numeric id in hex) to avoid cross-crate deps.
//!
//! Notes:
//! - All integers are printed in base-10 except where explicitly noted.
//! - Floats use exact bit-pattern format to prevent locale/rounding differences.
//! - Ordering is the canonical decode order; no address prefixes are emitted.
use crate::decode_next;
use crate::isa::core::{CoreOpCode, CoreOpCodeSpecExt};
use crate::DecodeError;
fn fmt_f64_bits(bits: u64) -> String {
// Fixed-width 16 hex digits, lowercase.
format!("f64:0x{bits:016x}")
}
fn format_operand(op: CoreOpCode, imm: &[u8]) -> String {
match op {
CoreOpCode::Jmp | CoreOpCode::JmpIfFalse | CoreOpCode::JmpIfTrue => {
let v = u32::from_le_bytes(imm.try_into().unwrap());
format!("{}", v)
}
CoreOpCode::PushI64 => {
let v = i64::from_le_bytes(imm.try_into().unwrap());
format!("{}", v)
}
CoreOpCode::PushF64 => {
let v = u64::from_le_bytes(imm.try_into().unwrap());
fmt_f64_bits(v)
}
CoreOpCode::PushBool => {
let v = imm[0];
format!("{}", v)
}
CoreOpCode::PushI32 => {
let v = i32::from_le_bytes(imm.try_into().unwrap());
format!("{}", v)
}
CoreOpCode::PopN
| CoreOpCode::PushConst
| CoreOpCode::GetGlobal
| CoreOpCode::SetGlobal
| CoreOpCode::GetLocal
| CoreOpCode::SetLocal
| CoreOpCode::Call
| CoreOpCode::Sleep
| CoreOpCode::Hostcall => {
let v = u32::from_le_bytes(imm.try_into().unwrap());
format!("{}", v)
}
CoreOpCode::MakeClosure => {
let fn_id = u32::from_le_bytes(imm[0..4].try_into().unwrap());
let cap = u32::from_le_bytes(imm[4..8].try_into().unwrap());
format!("fn={}, captures={}", fn_id, cap)
}
CoreOpCode::CallClosure => {
let argc = u32::from_le_bytes(imm.try_into().unwrap());
format!("argc={}", argc)
}
CoreOpCode::Spawn => {
let fn_id = u32::from_le_bytes(imm[0..4].try_into().unwrap());
let argc = u32::from_le_bytes(imm[4..8].try_into().unwrap());
format!("fn={}, argc={}", fn_id, argc)
}
CoreOpCode::Syscall => {
let id = u32::from_le_bytes(imm.try_into().unwrap());
// Hex id stable, avoids dependency on HAL metadata.
format!("0x{:04x}", id)
}
CoreOpCode::Intrinsic => {
let id = u32::from_le_bytes(imm.try_into().unwrap());
format!("0x{:04x}", id)
}
_ => {
// Fallback: raw immediate hex (little-endian, as encoded)
let mut s = String::with_capacity(2 + imm.len() * 2);
s.push_str("0x");
for b in imm {
use core::fmt::Write as _;
let _ = write!(&mut s, "{:02x}", b);
}
s
}
}
}
/// Disassembles a contiguous byte slice (single function body) into deterministic text.
pub fn disassemble(bytes: &[u8]) -> Result<String, DecodeError> {
let mut pc = 0usize;
let mut out = Vec::new();
while pc < bytes.len() {
let instr = decode_next(pc, bytes)?;
let name = instr.opcode.spec().name;
let imm_len = instr.opcode.spec().imm_bytes as usize;
if imm_len == 0 {
out.push(name.to_string());
} else {
let ops = format_operand(instr.opcode, instr.imm);
out.push(format!("{} {}", name, ops));
}
pc = instr.next_pc;
}
Ok(out.join("\n"))
}