368 lines
14 KiB
Rust
368 lines
14 KiB
Rust
//! Minimal deterministic assembler for the canonical disassembly format.
|
|
//!
|
|
//! This is intended primarily for roundtrip tests: `bytes -> disassemble -> assemble -> bytes`.
|
|
//! It supports all mnemonics emitted by `disassembler.rs` and their operand formats.
|
|
|
|
use crate::isa::core::CoreOpCode;
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum AsmError {
|
|
EmptyLine,
|
|
UnknownMnemonic(String),
|
|
UnexpectedOperand(String),
|
|
MissingOperand(String),
|
|
InvalidOperand(String),
|
|
}
|
|
|
|
fn emit_u16(v: u16, out: &mut Vec<u8>) {
|
|
out.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
fn emit_u32(v: u32, out: &mut Vec<u8>) {
|
|
out.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
fn emit_i32(v: i32, out: &mut Vec<u8>) {
|
|
out.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
fn emit_i64(v: i64, out: &mut Vec<u8>) {
|
|
out.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
fn emit_f64_bits(bits: u64, out: &mut Vec<u8>) {
|
|
out.extend_from_slice(&bits.to_le_bytes());
|
|
}
|
|
|
|
fn parse_u32_any(s: &str) -> Result<u32, AsmError> {
|
|
let s = s.trim();
|
|
if let Some(rest) = s.strip_prefix("0x") {
|
|
u32::from_str_radix(rest, 16).map_err(|_| AsmError::InvalidOperand(s.into()))
|
|
} else {
|
|
s.parse::<u32>().map_err(|_| AsmError::InvalidOperand(s.into()))
|
|
}
|
|
}
|
|
|
|
fn parse_i32_any(s: &str) -> Result<i32, AsmError> {
|
|
s.trim().parse::<i32>().map_err(|_| AsmError::InvalidOperand(s.into()))
|
|
}
|
|
|
|
fn parse_i64_any(s: &str) -> Result<i64, AsmError> {
|
|
s.trim().parse::<i64>().map_err(|_| AsmError::InvalidOperand(s.into()))
|
|
}
|
|
|
|
fn parse_f64_bits(s: &str) -> Result<u64, AsmError> {
|
|
let s = s.trim();
|
|
let s = s.strip_prefix("f64:").ok_or_else(|| AsmError::InvalidOperand(s.into()))?;
|
|
let hex = s.strip_prefix("0x").ok_or_else(|| AsmError::InvalidOperand(s.into()))?;
|
|
if hex.len() != 16 {
|
|
return Err(AsmError::InvalidOperand(s.into()));
|
|
}
|
|
u64::from_str_radix(hex, 16).map_err(|_| AsmError::InvalidOperand(s.into()))
|
|
}
|
|
|
|
fn parse_keyvals(s: &str) -> Result<(&str, &str), AsmError> {
|
|
// Parses formats like: "fn=123, captures=2" or "fn=3, argc=1"
|
|
let mut parts = s.split(',');
|
|
let a = parts.next().ok_or_else(|| AsmError::MissingOperand(s.into()))?.trim();
|
|
let b = parts.next().ok_or_else(|| AsmError::MissingOperand(s.into()))?.trim();
|
|
if parts.next().is_some() {
|
|
return Err(AsmError::InvalidOperand(s.into()));
|
|
}
|
|
Ok((a, b))
|
|
}
|
|
|
|
fn parse_pair<'a>(a: &'a str, ka: &str, b: &'a str, kb: &str) -> Result<(u32, u32), AsmError> {
|
|
let (ka_l, va_s) = a.split_once('=').ok_or_else(|| AsmError::InvalidOperand(a.into()))?;
|
|
let (kb_l, vb_s) = b.split_once('=').ok_or_else(|| AsmError::InvalidOperand(b.into()))?;
|
|
if ka_l.trim() != ka || kb_l.trim() != kb {
|
|
return Err(AsmError::InvalidOperand(format!("expected keys {} and {}", ka, kb)));
|
|
}
|
|
let va = parse_u32_any(va_s)?;
|
|
let vb = parse_u32_any(vb_s)?;
|
|
Ok((va, vb))
|
|
}
|
|
|
|
fn parse_mnemonic(line: &str) -> (&str, &str) {
|
|
let line = line.trim();
|
|
if let Some(sp) = line.find(char::is_whitespace) {
|
|
let (mn, rest) = line.split_at(sp);
|
|
(mn, rest.trim())
|
|
} else {
|
|
(line, "")
|
|
}
|
|
}
|
|
|
|
pub fn assemble(src: &str) -> Result<Vec<u8>, AsmError> {
|
|
let mut out = Vec::new();
|
|
for raw_line in src.lines() {
|
|
let line = raw_line.trim();
|
|
if line.is_empty() {
|
|
continue;
|
|
}
|
|
let (mn, ops) = parse_mnemonic(line);
|
|
match mn {
|
|
// Zero-operand
|
|
"NOP" => {
|
|
emit_u16(CoreOpCode::Nop as u16, &mut out);
|
|
}
|
|
"HALT" => {
|
|
emit_u16(CoreOpCode::Halt as u16, &mut out);
|
|
}
|
|
"TRAP" => {
|
|
emit_u16(CoreOpCode::Trap as u16, &mut out);
|
|
}
|
|
"DUP" => {
|
|
emit_u16(CoreOpCode::Dup as u16, &mut out);
|
|
}
|
|
"SWAP" => {
|
|
emit_u16(CoreOpCode::Swap as u16, &mut out);
|
|
}
|
|
"ADD" => {
|
|
emit_u16(CoreOpCode::Add as u16, &mut out);
|
|
}
|
|
"SUB" => {
|
|
emit_u16(CoreOpCode::Sub as u16, &mut out);
|
|
}
|
|
"MUL" => {
|
|
emit_u16(CoreOpCode::Mul as u16, &mut out);
|
|
}
|
|
"DIV" => {
|
|
emit_u16(CoreOpCode::Div as u16, &mut out);
|
|
}
|
|
"MOD" => {
|
|
emit_u16(CoreOpCode::Mod as u16, &mut out);
|
|
}
|
|
"NEG" => {
|
|
emit_u16(CoreOpCode::Neg as u16, &mut out);
|
|
}
|
|
"EQ" => {
|
|
emit_u16(CoreOpCode::Eq as u16, &mut out);
|
|
}
|
|
"NEQ" => {
|
|
emit_u16(CoreOpCode::Neq as u16, &mut out);
|
|
}
|
|
"LT" => {
|
|
emit_u16(CoreOpCode::Lt as u16, &mut out);
|
|
}
|
|
"LTE" => {
|
|
emit_u16(CoreOpCode::Lte as u16, &mut out);
|
|
}
|
|
"GT" => {
|
|
emit_u16(CoreOpCode::Gt as u16, &mut out);
|
|
}
|
|
"GTE" => {
|
|
emit_u16(CoreOpCode::Gte as u16, &mut out);
|
|
}
|
|
"AND" => {
|
|
emit_u16(CoreOpCode::And as u16, &mut out);
|
|
}
|
|
"OR" => {
|
|
emit_u16(CoreOpCode::Or as u16, &mut out);
|
|
}
|
|
"NOT" => {
|
|
emit_u16(CoreOpCode::Not as u16, &mut out);
|
|
}
|
|
"BIT_AND" => {
|
|
emit_u16(CoreOpCode::BitAnd as u16, &mut out);
|
|
}
|
|
"BIT_OR" => {
|
|
emit_u16(CoreOpCode::BitOr as u16, &mut out);
|
|
}
|
|
"BIT_XOR" => {
|
|
emit_u16(CoreOpCode::BitXor as u16, &mut out);
|
|
}
|
|
"SHL" => {
|
|
emit_u16(CoreOpCode::Shl as u16, &mut out);
|
|
}
|
|
"SHR" => {
|
|
emit_u16(CoreOpCode::Shr as u16, &mut out);
|
|
}
|
|
"RET" => {
|
|
emit_u16(CoreOpCode::Ret as u16, &mut out);
|
|
}
|
|
"YIELD" => {
|
|
emit_u16(CoreOpCode::Yield as u16, &mut out);
|
|
}
|
|
"FRAME_SYNC" => {
|
|
emit_u16(CoreOpCode::FrameSync as u16, &mut out);
|
|
}
|
|
|
|
// One u32 immediate (decimal or hex accepted; SYSCALL/HOSTCALL commonly use hex/idx)
|
|
"JMP" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::Jmp as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"JMP_IF_FALSE" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::JmpIfFalse as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"JMP_IF_TRUE" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::JmpIfTrue as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"PUSH_CONST" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::PushConst as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"PUSH_I64" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::PushI64 as u16, &mut out);
|
|
emit_i64(parse_i64_any(ops)?, &mut out);
|
|
}
|
|
"PUSH_F64" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::PushF64 as u16, &mut out);
|
|
emit_f64_bits(parse_f64_bits(ops)?, &mut out);
|
|
}
|
|
"PUSH_BOOL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
let v = parse_u32_any(ops)? as u8;
|
|
emit_u16(CoreOpCode::PushBool as u16, &mut out);
|
|
out.push(v);
|
|
}
|
|
"PUSH_I32" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::PushI32 as u16, &mut out);
|
|
emit_i32(parse_i32_any(ops)?, &mut out);
|
|
}
|
|
"POP_N" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::PopN as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"GET_GLOBAL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::GetGlobal as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"SET_GLOBAL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::SetGlobal as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"GET_LOCAL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::GetLocal as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"SET_LOCAL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::SetLocal as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"CALL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::Call as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"CALL_CLOSURE" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
let (k, v) =
|
|
ops.split_once('=').ok_or_else(|| AsmError::InvalidOperand(ops.into()))?;
|
|
if k.trim() != "argc" {
|
|
return Err(AsmError::InvalidOperand(ops.into()));
|
|
}
|
|
emit_u16(CoreOpCode::CallClosure as u16, &mut out);
|
|
emit_u32(parse_u32_any(v)?, &mut out);
|
|
}
|
|
"MAKE_CLOSURE" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
let (a, b) = parse_keyvals(ops)?;
|
|
// Accept either order but require exact key names
|
|
let (fn_id, captures) = if a.starts_with("fn=") && b.starts_with("captures=") {
|
|
parse_pair(a, "fn", b, "captures")?
|
|
} else if a.starts_with("captures=") && b.starts_with("fn=") {
|
|
let (cap, fid) = parse_pair(a, "captures", b, "fn")?;
|
|
(fid, cap)
|
|
} else {
|
|
return Err(AsmError::InvalidOperand(ops.into()));
|
|
};
|
|
emit_u16(CoreOpCode::MakeClosure as u16, &mut out);
|
|
emit_u32(fn_id, &mut out);
|
|
emit_u32(captures, &mut out);
|
|
}
|
|
"SPAWN" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
let (a, b) = parse_keyvals(ops)?;
|
|
let (fn_id, argc) = if a.starts_with("fn=") && b.starts_with("argc=") {
|
|
parse_pair(a, "fn", b, "argc")?
|
|
} else if a.starts_with("argc=") && b.starts_with("fn=") {
|
|
let (ac, fid) = parse_pair(a, "argc", b, "fn")?;
|
|
(fid, ac)
|
|
} else {
|
|
return Err(AsmError::InvalidOperand(ops.into()));
|
|
};
|
|
emit_u16(CoreOpCode::Spawn as u16, &mut out);
|
|
emit_u32(fn_id, &mut out);
|
|
emit_u32(argc, &mut out);
|
|
}
|
|
"SLEEP" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::Sleep as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"HOSTCALL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::Hostcall as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"SYSCALL" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::Syscall as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
"INTRINSIC" => {
|
|
if ops.is_empty() {
|
|
return Err(AsmError::MissingOperand(line.into()));
|
|
}
|
|
emit_u16(CoreOpCode::Intrinsic as u16, &mut out);
|
|
emit_u32(parse_u32_any(ops)?, &mut out);
|
|
}
|
|
|
|
other => return Err(AsmError::UnknownMnemonic(other.into())),
|
|
}
|
|
}
|
|
Ok(out)
|
|
}
|