//! Minimal deterministic assembler for the canonical disassembly format. //! //! This is intended primarily for roundtrip tests: `bytes -> disassemble -> assemble -> bytes`. //! It supports all mnemonics emitted by `disassembler.rs` and their operand formats. use crate::isa::core::CoreOpCode; #[derive(Debug, Clone, PartialEq, Eq)] pub enum AsmError { EmptyLine, UnknownMnemonic(String), UnexpectedOperand(String), MissingOperand(String), InvalidOperand(String), } fn emit_u16(v: u16, out: &mut Vec) { out.extend_from_slice(&v.to_le_bytes()); } fn emit_u32(v: u32, out: &mut Vec) { out.extend_from_slice(&v.to_le_bytes()); } fn emit_i32(v: i32, out: &mut Vec) { out.extend_from_slice(&v.to_le_bytes()); } fn emit_i64(v: i64, out: &mut Vec) { out.extend_from_slice(&v.to_le_bytes()); } fn emit_f64_bits(bits: u64, out: &mut Vec) { out.extend_from_slice(&bits.to_le_bytes()); } fn parse_u32_any(s: &str) -> Result { let s = s.trim(); if let Some(rest) = s.strip_prefix("0x") { u32::from_str_radix(rest, 16).map_err(|_| AsmError::InvalidOperand(s.into())) } else { s.parse::().map_err(|_| AsmError::InvalidOperand(s.into())) } } fn parse_i32_any(s: &str) -> Result { s.trim().parse::().map_err(|_| AsmError::InvalidOperand(s.into())) } fn parse_i64_any(s: &str) -> Result { s.trim().parse::().map_err(|_| AsmError::InvalidOperand(s.into())) } fn parse_f64_bits(s: &str) -> Result { let s = s.trim(); let s = s.strip_prefix("f64:").ok_or_else(|| AsmError::InvalidOperand(s.into()))?; let hex = s.strip_prefix("0x").ok_or_else(|| AsmError::InvalidOperand(s.into()))?; if hex.len() != 16 { return Err(AsmError::InvalidOperand(s.into())); } u64::from_str_radix(hex, 16).map_err(|_| AsmError::InvalidOperand(s.into())) } fn parse_keyvals(s: &str) -> Result<(&str, &str), AsmError> { // Parses formats like: "fn=123, captures=2" or "fn=3, argc=1" let mut parts = s.split(','); let a = parts.next().ok_or_else(|| AsmError::MissingOperand(s.into()))?.trim(); let b = parts.next().ok_or_else(|| AsmError::MissingOperand(s.into()))?.trim(); if parts.next().is_some() { return Err(AsmError::InvalidOperand(s.into())); } Ok((a, b)) } fn parse_pair<'a>(a: &'a str, ka: &str, b: &'a str, kb: &str) -> Result<(u32, u32), AsmError> { let (ka_l, va_s) = a.split_once('=').ok_or_else(|| AsmError::InvalidOperand(a.into()))?; let (kb_l, vb_s) = b.split_once('=').ok_or_else(|| AsmError::InvalidOperand(b.into()))?; if ka_l.trim() != ka || kb_l.trim() != kb { return Err(AsmError::InvalidOperand(format!("expected keys {} and {}", ka, kb))); } let va = parse_u32_any(va_s)?; let vb = parse_u32_any(vb_s)?; Ok((va, vb)) } fn parse_mnemonic(line: &str) -> (&str, &str) { let line = line.trim(); if let Some(sp) = line.find(char::is_whitespace) { let (mn, rest) = line.split_at(sp); (mn, rest.trim()) } else { (line, "") } } pub fn assemble(src: &str) -> Result, AsmError> { let mut out = Vec::new(); for raw_line in src.lines() { let line = raw_line.trim(); if line.is_empty() { continue; } let (mn, ops) = parse_mnemonic(line); match mn { // Zero-operand "NOP" => { emit_u16(CoreOpCode::Nop as u16, &mut out); } "HALT" => { emit_u16(CoreOpCode::Halt as u16, &mut out); } "TRAP" => { emit_u16(CoreOpCode::Trap as u16, &mut out); } "DUP" => { emit_u16(CoreOpCode::Dup as u16, &mut out); } "SWAP" => { emit_u16(CoreOpCode::Swap as u16, &mut out); } "ADD" => { emit_u16(CoreOpCode::Add as u16, &mut out); } "SUB" => { emit_u16(CoreOpCode::Sub as u16, &mut out); } "MUL" => { emit_u16(CoreOpCode::Mul as u16, &mut out); } "DIV" => { emit_u16(CoreOpCode::Div as u16, &mut out); } "MOD" => { emit_u16(CoreOpCode::Mod as u16, &mut out); } "NEG" => { emit_u16(CoreOpCode::Neg as u16, &mut out); } "EQ" => { emit_u16(CoreOpCode::Eq as u16, &mut out); } "NEQ" => { emit_u16(CoreOpCode::Neq as u16, &mut out); } "LT" => { emit_u16(CoreOpCode::Lt as u16, &mut out); } "LTE" => { emit_u16(CoreOpCode::Lte as u16, &mut out); } "GT" => { emit_u16(CoreOpCode::Gt as u16, &mut out); } "GTE" => { emit_u16(CoreOpCode::Gte as u16, &mut out); } "AND" => { emit_u16(CoreOpCode::And as u16, &mut out); } "OR" => { emit_u16(CoreOpCode::Or as u16, &mut out); } "NOT" => { emit_u16(CoreOpCode::Not as u16, &mut out); } "BIT_AND" => { emit_u16(CoreOpCode::BitAnd as u16, &mut out); } "BIT_OR" => { emit_u16(CoreOpCode::BitOr as u16, &mut out); } "BIT_XOR" => { emit_u16(CoreOpCode::BitXor as u16, &mut out); } "SHL" => { emit_u16(CoreOpCode::Shl as u16, &mut out); } "SHR" => { emit_u16(CoreOpCode::Shr as u16, &mut out); } "RET" => { emit_u16(CoreOpCode::Ret as u16, &mut out); } "YIELD" => { emit_u16(CoreOpCode::Yield as u16, &mut out); } "FRAME_SYNC" => { emit_u16(CoreOpCode::FrameSync as u16, &mut out); } // One u32 immediate (decimal or hex accepted; SYSCALL/HOSTCALL commonly use hex/idx) "JMP" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::Jmp as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "JMP_IF_FALSE" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::JmpIfFalse as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "JMP_IF_TRUE" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::JmpIfTrue as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "PUSH_CONST" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::PushConst as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "PUSH_I64" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::PushI64 as u16, &mut out); emit_i64(parse_i64_any(ops)?, &mut out); } "PUSH_F64" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::PushF64 as u16, &mut out); emit_f64_bits(parse_f64_bits(ops)?, &mut out); } "PUSH_BOOL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } let v = parse_u32_any(ops)? as u8; emit_u16(CoreOpCode::PushBool as u16, &mut out); out.push(v); } "PUSH_I32" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::PushI32 as u16, &mut out); emit_i32(parse_i32_any(ops)?, &mut out); } "POP_N" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::PopN as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "GET_GLOBAL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::GetGlobal as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "SET_GLOBAL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::SetGlobal as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "GET_LOCAL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::GetLocal as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "SET_LOCAL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::SetLocal as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "CALL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::Call as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "CALL_CLOSURE" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } let (k, v) = ops.split_once('=').ok_or_else(|| AsmError::InvalidOperand(ops.into()))?; if k.trim() != "argc" { return Err(AsmError::InvalidOperand(ops.into())); } emit_u16(CoreOpCode::CallClosure as u16, &mut out); emit_u32(parse_u32_any(v)?, &mut out); } "MAKE_CLOSURE" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } let (a, b) = parse_keyvals(ops)?; // Accept either order but require exact key names let (fn_id, captures) = if a.starts_with("fn=") && b.starts_with("captures=") { parse_pair(a, "fn", b, "captures")? } else if a.starts_with("captures=") && b.starts_with("fn=") { let (cap, fid) = parse_pair(a, "captures", b, "fn")?; (fid, cap) } else { return Err(AsmError::InvalidOperand(ops.into())); }; emit_u16(CoreOpCode::MakeClosure as u16, &mut out); emit_u32(fn_id, &mut out); emit_u32(captures, &mut out); } "SPAWN" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } let (a, b) = parse_keyvals(ops)?; let (fn_id, argc) = if a.starts_with("fn=") && b.starts_with("argc=") { parse_pair(a, "fn", b, "argc")? } else if a.starts_with("argc=") && b.starts_with("fn=") { let (ac, fid) = parse_pair(a, "argc", b, "fn")?; (fid, ac) } else { return Err(AsmError::InvalidOperand(ops.into())); }; emit_u16(CoreOpCode::Spawn as u16, &mut out); emit_u32(fn_id, &mut out); emit_u32(argc, &mut out); } "SLEEP" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::Sleep as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "HOSTCALL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::Hostcall as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "SYSCALL" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::Syscall as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } "INTRINSIC" => { if ops.is_empty() { return Err(AsmError::MissingOperand(line.into())); } emit_u16(CoreOpCode::Intrinsic as u16, &mut out); emit_u32(parse_u32_any(ops)?, &mut out); } other => return Err(AsmError::UnknownMnemonic(other.into())), } } Ok(out) }