pub mod linker; use crate::abi::SourceSpan; use crate::opcode::OpCode; use serde::{Deserialize, Serialize}; /// An entry in the Constant Pool. /// /// The Constant Pool is a table of unique values used by the program. /// Instead of embedding large data (like strings) directly in the instruction stream, /// the bytecode uses `PushConst ` to load these values onto the stack. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum ConstantPoolEntry { /// Reserved index (0). Represents a null/undefined value. Null, /// A 64-bit integer constant. Int64(i64), /// A 64-bit floating point constant. Float64(f64), /// A boolean constant. Boolean(bool), /// A UTF-8 string constant. String(String), /// A 32-bit integer constant. Int32(i32), } #[derive(Debug, Clone, PartialEq, Eq)] pub enum LoadError { InvalidMagic, InvalidVersion, InvalidEndianness, OverlappingSections, SectionOutOfBounds, InvalidOpcode, InvalidConstIndex, InvalidFunctionIndex, MalformedHeader, MalformedSection, UnexpectedEof, } #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub struct FunctionMeta { pub code_offset: u32, pub code_len: u32, pub param_slots: u16, pub local_slots: u16, pub return_slots: u16, pub max_stack_slots: u16, } #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct DebugInfo { pub pc_to_span: Vec<(u32, SourceSpan)>, // Sorted by PC pub function_names: Vec<(u32, String)>, // (func_idx, name) } #[derive(Debug, Clone, PartialEq, Eq)] pub struct Export { pub symbol: String, pub func_idx: u32, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct Import { pub symbol: String, pub relocation_pcs: Vec, } #[derive(Debug, Clone, PartialEq)] pub struct BytecodeModule { pub version: u16, pub const_pool: Vec, pub functions: Vec, pub code: Vec, pub debug_info: Option, pub exports: Vec, pub imports: Vec, } impl BytecodeModule { pub fn serialize(&self) -> Vec { let cp_data = self.serialize_const_pool(); let func_data = self.serialize_functions(); let code_data = self.code.clone(); let debug_data = self.debug_info.as_ref().map(|di| self.serialize_debug(di)).unwrap_or_default(); let export_data = self.serialize_exports(); let import_data = self.serialize_imports(); let mut final_sections = Vec::new(); if !cp_data.is_empty() { final_sections.push((0, cp_data)); } if !func_data.is_empty() { final_sections.push((1, func_data)); } if !code_data.is_empty() { final_sections.push((2, code_data)); } if !debug_data.is_empty() { final_sections.push((3, debug_data)); } if !export_data.is_empty() { final_sections.push((4, export_data)); } if !import_data.is_empty() { final_sections.push((5, import_data)); } let mut out = Vec::new(); // Magic "PBS\0" out.extend_from_slice(b"PBS\0"); // Version 0 out.extend_from_slice(&0u16.to_le_bytes()); // Endianness 0 (Little Endian), Reserved out.extend_from_slice(&[0u8, 0u8]); // section_count out.extend_from_slice(&(final_sections.len() as u32).to_le_bytes()); // padding to 32 bytes out.extend_from_slice(&[0u8; 20]); let mut current_offset = 32 + (final_sections.len() as u32 * 12); // Write section table for (kind, data) in &final_sections { let k: u32 = *kind; out.extend_from_slice(&k.to_le_bytes()); out.extend_from_slice(¤t_offset.to_le_bytes()); out.extend_from_slice(&(data.len() as u32).to_le_bytes()); current_offset += data.len() as u32; } // Write section data for (_, data) in final_sections { out.extend_from_slice(&data); } out } fn serialize_const_pool(&self) -> Vec { if self.const_pool.is_empty() { return Vec::new(); } let mut data = Vec::new(); data.extend_from_slice(&(self.const_pool.len() as u32).to_le_bytes()); for entry in &self.const_pool { match entry { ConstantPoolEntry::Null => data.push(0), ConstantPoolEntry::Int64(v) => { data.push(1); data.extend_from_slice(&v.to_le_bytes()); } ConstantPoolEntry::Float64(v) => { data.push(2); data.extend_from_slice(&v.to_le_bytes()); } ConstantPoolEntry::Boolean(v) => { data.push(3); data.push(if *v { 1 } else { 0 }); } ConstantPoolEntry::String(v) => { data.push(4); let s_bytes = v.as_bytes(); data.extend_from_slice(&(s_bytes.len() as u32).to_le_bytes()); data.extend_from_slice(s_bytes); } ConstantPoolEntry::Int32(v) => { data.push(5); data.extend_from_slice(&v.to_le_bytes()); } } } data } fn serialize_functions(&self) -> Vec { if self.functions.is_empty() { return Vec::new(); } let mut data = Vec::new(); data.extend_from_slice(&(self.functions.len() as u32).to_le_bytes()); for f in &self.functions { data.extend_from_slice(&f.code_offset.to_le_bytes()); data.extend_from_slice(&f.code_len.to_le_bytes()); data.extend_from_slice(&f.param_slots.to_le_bytes()); data.extend_from_slice(&f.local_slots.to_le_bytes()); data.extend_from_slice(&f.return_slots.to_le_bytes()); data.extend_from_slice(&f.max_stack_slots.to_le_bytes()); } data } fn serialize_debug(&self, di: &DebugInfo) -> Vec { let mut data = Vec::new(); data.extend_from_slice(&(di.pc_to_span.len() as u32).to_le_bytes()); for (pc, span) in &di.pc_to_span { data.extend_from_slice(&pc.to_le_bytes()); data.extend_from_slice(&span.file_id.to_le_bytes()); data.extend_from_slice(&span.start.to_le_bytes()); data.extend_from_slice(&span.end.to_le_bytes()); } data.extend_from_slice(&(di.function_names.len() as u32).to_le_bytes()); for (idx, name) in &di.function_names { data.extend_from_slice(&idx.to_le_bytes()); let n_bytes = name.as_bytes(); data.extend_from_slice(&(n_bytes.len() as u32).to_le_bytes()); data.extend_from_slice(n_bytes); } data } fn serialize_exports(&self) -> Vec { if self.exports.is_empty() { return Vec::new(); } let mut data = Vec::new(); data.extend_from_slice(&(self.exports.len() as u32).to_le_bytes()); for exp in &self.exports { data.extend_from_slice(&exp.func_idx.to_le_bytes()); let s_bytes = exp.symbol.as_bytes(); data.extend_from_slice(&(s_bytes.len() as u32).to_le_bytes()); data.extend_from_slice(s_bytes); } data } fn serialize_imports(&self) -> Vec { if self.imports.is_empty() { return Vec::new(); } let mut data = Vec::new(); data.extend_from_slice(&(self.imports.len() as u32).to_le_bytes()); for imp in &self.imports { let s_bytes = imp.symbol.as_bytes(); data.extend_from_slice(&(s_bytes.len() as u32).to_le_bytes()); data.extend_from_slice(s_bytes); data.extend_from_slice(&(imp.relocation_pcs.len() as u32).to_le_bytes()); for pc in &imp.relocation_pcs { data.extend_from_slice(&pc.to_le_bytes()); } } data } } pub struct BytecodeLoader; impl BytecodeLoader { pub fn load(bytes: &[u8]) -> Result { if bytes.len() < 32 { return Err(LoadError::UnexpectedEof); } // Magic "PBS\0" if &bytes[0..4] != b"PBS\0" { return Err(LoadError::InvalidMagic); } let version = u16::from_le_bytes([bytes[4], bytes[5]]); if version != 0 { return Err(LoadError::InvalidVersion); } let endianness = bytes[6]; if endianness != 0 { // 0 = Little Endian return Err(LoadError::InvalidEndianness); } let section_count = u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]); let mut sections = Vec::new(); let mut pos = 32; for _ in 0..section_count { if pos + 12 > bytes.len() { return Err(LoadError::UnexpectedEof); } let kind = u32::from_le_bytes([bytes[pos], bytes[pos+1], bytes[pos+2], bytes[pos+3]]); let offset = u32::from_le_bytes([bytes[pos+4], bytes[pos+5], bytes[pos+6], bytes[pos+7]]); let length = u32::from_le_bytes([bytes[pos+8], bytes[pos+9], bytes[pos+10], bytes[pos+11]]); // Basic bounds check if (offset as usize) + (length as usize) > bytes.len() { return Err(LoadError::SectionOutOfBounds); } sections.push((kind, offset, length)); pos += 12; } // Check for overlapping sections for i in 0..sections.len() { for j in i + 1..sections.len() { let (_, o1, l1) = sections[i]; let (_, o2, l2) = sections[j]; if (o1 < o2 + l2) && (o2 < o1 + l1) { return Err(LoadError::OverlappingSections); } } } let mut module = BytecodeModule { version, const_pool: Vec::new(), functions: Vec::new(), code: Vec::new(), debug_info: None, exports: Vec::new(), imports: Vec::new(), }; for (kind, offset, length) in sections { let section_data = &bytes[offset as usize..(offset + length) as usize]; match kind { 0 => { // Const Pool module.const_pool = parse_const_pool(section_data)?; } 1 => { // Functions module.functions = parse_functions(section_data)?; } 2 => { // Code module.code = section_data.to_vec(); } 3 => { // Debug Info module.debug_info = Some(parse_debug_section(section_data)?); } 4 => { // Exports module.exports = parse_exports(section_data)?; } 5 => { // Imports module.imports = parse_imports(section_data)?; } _ => {} // Skip unknown or optional sections } } // Additional validations validate_module(&module)?; Ok(module) } } fn parse_const_pool(data: &[u8]) -> Result, LoadError> { if data.is_empty() { return Ok(Vec::new()); } if data.len() < 4 { return Err(LoadError::MalformedSection); } let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; let mut cp = Vec::with_capacity(count); let mut pos = 4; for _ in 0..count { if pos >= data.len() { return Err(LoadError::UnexpectedEof); } let tag = data[pos]; pos += 1; match tag { 0 => cp.push(ConstantPoolEntry::Null), 1 => { // Int64 if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } let val = i64::from_le_bytes(data[pos..pos+8].try_into().unwrap()); cp.push(ConstantPoolEntry::Int64(val)); pos += 8; } 2 => { // Float64 if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } let val = f64::from_le_bytes(data[pos..pos+8].try_into().unwrap()); cp.push(ConstantPoolEntry::Float64(val)); pos += 8; } 3 => { // Boolean if pos >= data.len() { return Err(LoadError::UnexpectedEof); } cp.push(ConstantPoolEntry::Boolean(data[pos] != 0)); pos += 1; } 4 => { // String if pos + 4 > data.len() { return Err(LoadError::UnexpectedEof); } let len = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; pos += 4; if pos + len > data.len() { return Err(LoadError::UnexpectedEof); } let s = String::from_utf8_lossy(&data[pos..pos+len]).into_owned(); cp.push(ConstantPoolEntry::String(s)); pos += len; } 5 => { // Int32 if pos + 4 > data.len() { return Err(LoadError::UnexpectedEof); } let val = i32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); cp.push(ConstantPoolEntry::Int32(val)); pos += 4; } _ => return Err(LoadError::MalformedSection), } } Ok(cp) } fn parse_functions(data: &[u8]) -> Result, LoadError> { if data.is_empty() { return Ok(Vec::new()); } if data.len() < 4 { return Err(LoadError::MalformedSection); } let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; let mut functions = Vec::with_capacity(count); let mut pos = 4; for _ in 0..count { if pos + 16 > data.len() { return Err(LoadError::UnexpectedEof); } let code_offset = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); let code_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()); let param_slots = u16::from_le_bytes(data[pos+8..pos+10].try_into().unwrap()); let local_slots = u16::from_le_bytes(data[pos+10..pos+12].try_into().unwrap()); let return_slots = u16::from_le_bytes(data[pos+12..pos+14].try_into().unwrap()); let max_stack_slots = u16::from_le_bytes(data[pos+14..pos+16].try_into().unwrap()); functions.push(FunctionMeta { code_offset, code_len, param_slots, local_slots, return_slots, max_stack_slots, }); pos += 16; } Ok(functions) } fn parse_debug_section(data: &[u8]) -> Result { if data.is_empty() { return Ok(DebugInfo::default()); } if data.len() < 8 { return Err(LoadError::MalformedSection); } let mut pos = 0; // PC to Span table let span_count = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; pos += 4; let mut pc_to_span = Vec::with_capacity(span_count); for _ in 0..span_count { if pos + 16 > data.len() { return Err(LoadError::UnexpectedEof); } let pc = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); let file_id = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()); let start = u32::from_le_bytes(data[pos+8..pos+12].try_into().unwrap()); let end = u32::from_le_bytes(data[pos+12..pos+16].try_into().unwrap()); pc_to_span.push((pc, SourceSpan { file_id, start, end })); pos += 16; } // Function names table if pos + 4 > data.len() { return Err(LoadError::UnexpectedEof); } let func_name_count = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; pos += 4; let mut function_names = Vec::with_capacity(func_name_count); for _ in 0..func_name_count { if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } let func_idx = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); let name_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()) as usize; pos += 8; if pos + name_len > data.len() { return Err(LoadError::UnexpectedEof); } let name = String::from_utf8_lossy(&data[pos..pos+name_len]).into_owned(); function_names.push((func_idx, name)); pos += name_len; } Ok(DebugInfo { pc_to_span, function_names }) } fn parse_exports(data: &[u8]) -> Result, LoadError> { if data.is_empty() { return Ok(Vec::new()); } if data.len() < 4 { return Err(LoadError::MalformedSection); } let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; let mut exports = Vec::with_capacity(count); let mut pos = 4; for _ in 0..count { if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } let func_idx = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); let name_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()) as usize; pos += 8; if pos + name_len > data.len() { return Err(LoadError::UnexpectedEof); } let symbol = String::from_utf8_lossy(&data[pos..pos+name_len]).into_owned(); exports.push(Export { symbol, func_idx }); pos += name_len; } Ok(exports) } fn parse_imports(data: &[u8]) -> Result, LoadError> { if data.is_empty() { return Ok(Vec::new()); } if data.len() < 4 { return Err(LoadError::MalformedSection); } let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; let mut imports = Vec::with_capacity(count); let mut pos = 4; for _ in 0..count { if pos + 8 > data.len() { return Err(LoadError::UnexpectedEof); } let relocation_count = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()) as usize; let name_len = u32::from_le_bytes(data[pos+4..pos+8].try_into().unwrap()) as usize; pos += 8; if pos + name_len > data.len() { return Err(LoadError::UnexpectedEof); } let symbol = String::from_utf8_lossy(&data[pos..pos+name_len]).into_owned(); pos += name_len; if pos + relocation_count * 4 > data.len() { return Err(LoadError::UnexpectedEof); } let mut relocation_pcs = Vec::with_capacity(relocation_count); for _ in 0..relocation_count { let pc = u32::from_le_bytes(data[pos..pos+4].try_into().unwrap()); relocation_pcs.push(pc); pos += 4; } imports.push(Import { symbol, relocation_pcs }); } Ok(imports) } fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> { for func in &module.functions { // Opcode stream bounds if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() { return Err(LoadError::InvalidFunctionIndex); } } // Basic opcode scan for const pool indices let mut pos = 0; while pos < module.code.len() { if pos + 2 > module.code.len() { break; // Unexpected EOF in middle of opcode, maybe should be error } let op_val = u16::from_le_bytes([module.code[pos], module.code[pos+1]]); let opcode = OpCode::try_from(op_val).map_err(|_| LoadError::InvalidOpcode)?; pos += 2; match opcode { OpCode::PushConst => { if pos + 4 > module.code.len() { return Err(LoadError::UnexpectedEof); } let idx = u32::from_le_bytes(module.code[pos..pos+4].try_into().unwrap()) as usize; if idx >= module.const_pool.len() { return Err(LoadError::InvalidConstIndex); } pos += 4; } OpCode::PushI32 | OpCode::PushBounded | OpCode::Jmp | OpCode::JmpIfFalse | OpCode::JmpIfTrue | OpCode::GetGlobal | OpCode::SetGlobal | OpCode::GetLocal | OpCode::SetLocal | OpCode::PopN | OpCode::Syscall | OpCode::GateLoad | OpCode::GateStore => { pos += 4; } OpCode::PushI64 | OpCode::PushF64 => { pos += 8; } OpCode::PushBool => { pos += 1; } OpCode::Call => { pos += 4; } OpCode::Alloc => { pos += 8; } _ => {} } } Ok(()) } #[cfg(test)] mod tests { use super::*; fn create_header(section_count: u32) -> Vec { let mut h = vec![0u8; 32]; h[0..4].copy_from_slice(b"PBS\0"); h[4..6].copy_from_slice(&0u16.to_le_bytes()); // version h[6] = 0; // endianness h[8..12].copy_from_slice(§ion_count.to_le_bytes()); h } #[test] fn test_invalid_magic() { let mut data = create_header(0); data[0] = b'X'; assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidMagic)); } #[test] fn test_invalid_version() { let mut data = create_header(0); data[4] = 1; assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidVersion)); } #[test] fn test_invalid_endianness() { let mut data = create_header(0); data[6] = 1; assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidEndianness)); } #[test] fn test_overlapping_sections() { let mut data = create_header(2); // Section 1: Kind 0, Offset 64, Length 32 data.extend_from_slice(&0u32.to_le_bytes()); data.extend_from_slice(&64u32.to_le_bytes()); data.extend_from_slice(&32u32.to_le_bytes()); // Section 2: Kind 1, Offset 80, Length 32 (Overlaps with Section 1) data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&80u32.to_le_bytes()); data.extend_from_slice(&32u32.to_le_bytes()); // Ensure data is long enough for the offsets data.resize(256, 0); assert_eq!(BytecodeLoader::load(&data), Err(LoadError::OverlappingSections)); } #[test] fn test_section_out_of_bounds() { let mut data = create_header(1); // Section 1: Kind 0, Offset 64, Length 1000 data.extend_from_slice(&0u32.to_le_bytes()); data.extend_from_slice(&64u32.to_le_bytes()); data.extend_from_slice(&1000u32.to_le_bytes()); data.resize(256, 0); assert_eq!(BytecodeLoader::load(&data), Err(LoadError::SectionOutOfBounds)); } #[test] fn test_invalid_function_code_offset() { let mut data = create_header(2); // Section 1: Functions, Kind 1, Offset 64, Length 20 (Header 4 + 1 entry 16) data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&64u32.to_le_bytes()); data.extend_from_slice(&20u32.to_le_bytes()); // Section 2: Code, Kind 2, Offset 128, Length 10 data.extend_from_slice(&2u32.to_le_bytes()); data.extend_from_slice(&128u32.to_le_bytes()); data.extend_from_slice(&10u32.to_le_bytes()); data.resize(256, 0); // Setup functions section let func_data_start = 64; data[func_data_start..func_data_start+4].copy_from_slice(&1u32.to_le_bytes()); // 1 function let entry_start = func_data_start + 4; data[entry_start..entry_start+4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5 data[entry_start+4..entry_start+8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10 // 5 + 10 = 15 > 10 (code section length) assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex)); } #[test] fn test_invalid_const_index() { let mut data = create_header(2); // Section 1: Const Pool, Kind 0, Offset 64, Length 4 (Empty CP) data.extend_from_slice(&0u32.to_le_bytes()); data.extend_from_slice(&64u32.to_le_bytes()); data.extend_from_slice(&4u32.to_le_bytes()); // Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0) data.extend_from_slice(&2u32.to_le_bytes()); data.extend_from_slice(&128u32.to_le_bytes()); data.extend_from_slice(&6u32.to_le_bytes()); data.resize(256, 0); // Setup empty CP data[64..68].copy_from_slice(&0u32.to_le_bytes()); // Setup code with PushConst 0 data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes()); data[130..134].copy_from_slice(&0u32.to_le_bytes()); assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex)); } #[test] fn test_valid_minimal_load() { let data = create_header(0); let module = BytecodeLoader::load(&data).unwrap(); assert_eq!(module.version, 0); assert!(module.const_pool.is_empty()); assert!(module.functions.is_empty()); assert!(module.code.is_empty()); } }