2026-03-24 13:40:45 +00:00

368 lines
14 KiB
Rust

//! Minimal deterministic assembler for the canonical disassembly format.
//!
//! This is intended primarily for roundtrip tests: `bytes -> disassemble -> assemble -> bytes`.
//! It supports all mnemonics emitted by `disassembler.rs` and their operand formats.
use crate::isa::core::CoreOpCode;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AsmError {
EmptyLine,
UnknownMnemonic(String),
UnexpectedOperand(String),
MissingOperand(String),
InvalidOperand(String),
}
fn emit_u16(v: u16, out: &mut Vec<u8>) {
out.extend_from_slice(&v.to_le_bytes());
}
fn emit_u32(v: u32, out: &mut Vec<u8>) {
out.extend_from_slice(&v.to_le_bytes());
}
fn emit_i32(v: i32, out: &mut Vec<u8>) {
out.extend_from_slice(&v.to_le_bytes());
}
fn emit_i64(v: i64, out: &mut Vec<u8>) {
out.extend_from_slice(&v.to_le_bytes());
}
fn emit_f64_bits(bits: u64, out: &mut Vec<u8>) {
out.extend_from_slice(&bits.to_le_bytes());
}
fn parse_u32_any(s: &str) -> Result<u32, AsmError> {
let s = s.trim();
if let Some(rest) = s.strip_prefix("0x") {
u32::from_str_radix(rest, 16).map_err(|_| AsmError::InvalidOperand(s.into()))
} else {
s.parse::<u32>().map_err(|_| AsmError::InvalidOperand(s.into()))
}
}
fn parse_i32_any(s: &str) -> Result<i32, AsmError> {
s.trim().parse::<i32>().map_err(|_| AsmError::InvalidOperand(s.into()))
}
fn parse_i64_any(s: &str) -> Result<i64, AsmError> {
s.trim().parse::<i64>().map_err(|_| AsmError::InvalidOperand(s.into()))
}
fn parse_f64_bits(s: &str) -> Result<u64, AsmError> {
let s = s.trim();
let s = s.strip_prefix("f64:").ok_or_else(|| AsmError::InvalidOperand(s.into()))?;
let hex = s.strip_prefix("0x").ok_or_else(|| AsmError::InvalidOperand(s.into()))?;
if hex.len() != 16 {
return Err(AsmError::InvalidOperand(s.into()));
}
u64::from_str_radix(hex, 16).map_err(|_| AsmError::InvalidOperand(s.into()))
}
fn parse_keyvals(s: &str) -> Result<(&str, &str), AsmError> {
// Parses formats like: "fn=123, captures=2" or "fn=3, argc=1"
let mut parts = s.split(',');
let a = parts.next().ok_or_else(|| AsmError::MissingOperand(s.into()))?.trim();
let b = parts.next().ok_or_else(|| AsmError::MissingOperand(s.into()))?.trim();
if parts.next().is_some() {
return Err(AsmError::InvalidOperand(s.into()));
}
Ok((a, b))
}
fn parse_pair<'a>(a: &'a str, ka: &str, b: &'a str, kb: &str) -> Result<(u32, u32), AsmError> {
let (ka_l, va_s) = a.split_once('=').ok_or_else(|| AsmError::InvalidOperand(a.into()))?;
let (kb_l, vb_s) = b.split_once('=').ok_or_else(|| AsmError::InvalidOperand(b.into()))?;
if ka_l.trim() != ka || kb_l.trim() != kb {
return Err(AsmError::InvalidOperand(format!("expected keys {} and {}", ka, kb)));
}
let va = parse_u32_any(va_s)?;
let vb = parse_u32_any(vb_s)?;
Ok((va, vb))
}
fn parse_mnemonic(line: &str) -> (&str, &str) {
let line = line.trim();
if let Some(sp) = line.find(char::is_whitespace) {
let (mn, rest) = line.split_at(sp);
(mn, rest.trim())
} else {
(line, "")
}
}
pub fn assemble(src: &str) -> Result<Vec<u8>, AsmError> {
let mut out = Vec::new();
for raw_line in src.lines() {
let line = raw_line.trim();
if line.is_empty() {
continue;
}
let (mn, ops) = parse_mnemonic(line);
match mn {
// Zero-operand
"NOP" => {
emit_u16(CoreOpCode::Nop as u16, &mut out);
}
"HALT" => {
emit_u16(CoreOpCode::Halt as u16, &mut out);
}
"TRAP" => {
emit_u16(CoreOpCode::Trap as u16, &mut out);
}
"DUP" => {
emit_u16(CoreOpCode::Dup as u16, &mut out);
}
"SWAP" => {
emit_u16(CoreOpCode::Swap as u16, &mut out);
}
"ADD" => {
emit_u16(CoreOpCode::Add as u16, &mut out);
}
"SUB" => {
emit_u16(CoreOpCode::Sub as u16, &mut out);
}
"MUL" => {
emit_u16(CoreOpCode::Mul as u16, &mut out);
}
"DIV" => {
emit_u16(CoreOpCode::Div as u16, &mut out);
}
"MOD" => {
emit_u16(CoreOpCode::Mod as u16, &mut out);
}
"NEG" => {
emit_u16(CoreOpCode::Neg as u16, &mut out);
}
"EQ" => {
emit_u16(CoreOpCode::Eq as u16, &mut out);
}
"NEQ" => {
emit_u16(CoreOpCode::Neq as u16, &mut out);
}
"LT" => {
emit_u16(CoreOpCode::Lt as u16, &mut out);
}
"LTE" => {
emit_u16(CoreOpCode::Lte as u16, &mut out);
}
"GT" => {
emit_u16(CoreOpCode::Gt as u16, &mut out);
}
"GTE" => {
emit_u16(CoreOpCode::Gte as u16, &mut out);
}
"AND" => {
emit_u16(CoreOpCode::And as u16, &mut out);
}
"OR" => {
emit_u16(CoreOpCode::Or as u16, &mut out);
}
"NOT" => {
emit_u16(CoreOpCode::Not as u16, &mut out);
}
"BIT_AND" => {
emit_u16(CoreOpCode::BitAnd as u16, &mut out);
}
"BIT_OR" => {
emit_u16(CoreOpCode::BitOr as u16, &mut out);
}
"BIT_XOR" => {
emit_u16(CoreOpCode::BitXor as u16, &mut out);
}
"SHL" => {
emit_u16(CoreOpCode::Shl as u16, &mut out);
}
"SHR" => {
emit_u16(CoreOpCode::Shr as u16, &mut out);
}
"RET" => {
emit_u16(CoreOpCode::Ret as u16, &mut out);
}
"YIELD" => {
emit_u16(CoreOpCode::Yield as u16, &mut out);
}
"FRAME_SYNC" => {
emit_u16(CoreOpCode::FrameSync as u16, &mut out);
}
// One u32 immediate (decimal or hex accepted; SYSCALL/HOSTCALL commonly use hex/idx)
"JMP" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::Jmp as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"JMP_IF_FALSE" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::JmpIfFalse as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"JMP_IF_TRUE" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::JmpIfTrue as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"PUSH_CONST" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::PushConst as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"PUSH_I64" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::PushI64 as u16, &mut out);
emit_i64(parse_i64_any(ops)?, &mut out);
}
"PUSH_F64" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::PushF64 as u16, &mut out);
emit_f64_bits(parse_f64_bits(ops)?, &mut out);
}
"PUSH_BOOL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
let v = parse_u32_any(ops)? as u8;
emit_u16(CoreOpCode::PushBool as u16, &mut out);
out.push(v);
}
"PUSH_I32" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::PushI32 as u16, &mut out);
emit_i32(parse_i32_any(ops)?, &mut out);
}
"POP_N" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::PopN as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"GET_GLOBAL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::GetGlobal as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"SET_GLOBAL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::SetGlobal as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"GET_LOCAL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::GetLocal as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"SET_LOCAL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::SetLocal as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"CALL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::Call as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"CALL_CLOSURE" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
let (k, v) =
ops.split_once('=').ok_or_else(|| AsmError::InvalidOperand(ops.into()))?;
if k.trim() != "argc" {
return Err(AsmError::InvalidOperand(ops.into()));
}
emit_u16(CoreOpCode::CallClosure as u16, &mut out);
emit_u32(parse_u32_any(v)?, &mut out);
}
"MAKE_CLOSURE" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
let (a, b) = parse_keyvals(ops)?;
// Accept either order but require exact key names
let (fn_id, captures) = if a.starts_with("fn=") && b.starts_with("captures=") {
parse_pair(a, "fn", b, "captures")?
} else if a.starts_with("captures=") && b.starts_with("fn=") {
let (cap, fid) = parse_pair(a, "captures", b, "fn")?;
(fid, cap)
} else {
return Err(AsmError::InvalidOperand(ops.into()));
};
emit_u16(CoreOpCode::MakeClosure as u16, &mut out);
emit_u32(fn_id, &mut out);
emit_u32(captures, &mut out);
}
"SPAWN" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
let (a, b) = parse_keyvals(ops)?;
let (fn_id, argc) = if a.starts_with("fn=") && b.starts_with("argc=") {
parse_pair(a, "fn", b, "argc")?
} else if a.starts_with("argc=") && b.starts_with("fn=") {
let (ac, fid) = parse_pair(a, "argc", b, "fn")?;
(fid, ac)
} else {
return Err(AsmError::InvalidOperand(ops.into()));
};
emit_u16(CoreOpCode::Spawn as u16, &mut out);
emit_u32(fn_id, &mut out);
emit_u32(argc, &mut out);
}
"SLEEP" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::Sleep as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"HOSTCALL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::Hostcall as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"SYSCALL" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::Syscall as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
"INTRINSIC" => {
if ops.is_empty() {
return Err(AsmError::MissingOperand(line.into()));
}
emit_u16(CoreOpCode::Intrinsic as u16, &mut out);
emit_u32(parse_u32_any(ops)?, &mut out);
}
other => return Err(AsmError::UnknownMnemonic(other.into())),
}
}
Ok(out)
}