947 lines
32 KiB
Rust
947 lines
32 KiB
Rust
use crate::abi::SourceSpan;
|
|
use crate::opcode::OpCode;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashSet;
|
|
|
|
/// An entry in the Constant Pool.
|
|
///
|
|
/// The Constant Pool is a table of unique values used by the program.
|
|
/// Instead of embedding large data (like strings) directly in the instruction stream,
|
|
/// the bytecode uses `PushConst <index>` to load these values onto the stack.
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
pub enum ConstantPoolEntry {
|
|
/// Reserved index (0). Represents a null/undefined value.
|
|
Null,
|
|
/// A 64-bit integer constant.
|
|
Int64(i64),
|
|
/// A 64-bit floating point constant.
|
|
Float64(f64),
|
|
/// A boolean constant.
|
|
Boolean(bool),
|
|
/// A UTF-8 string constant.
|
|
String(String),
|
|
/// A 32-bit integer constant.
|
|
Int32(i32),
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum LoadError {
|
|
InvalidMagic,
|
|
InvalidVersion,
|
|
InvalidEndianness,
|
|
OverlappingSections,
|
|
SectionOutOfBounds,
|
|
InvalidOpcode,
|
|
InvalidConstIndex,
|
|
InvalidFunctionIndex,
|
|
MalformedHeader,
|
|
MalformedSection,
|
|
MissingSyscallSection,
|
|
DuplicateSyscallIdentity,
|
|
InvalidUtf8,
|
|
UnexpectedEof,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
|
|
pub struct FunctionMeta {
|
|
pub code_offset: u32,
|
|
pub code_len: u32,
|
|
pub param_slots: u16,
|
|
pub local_slots: u16,
|
|
pub return_slots: u16,
|
|
pub max_stack_slots: u16,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct DebugInfo {
|
|
pub pc_to_span: Vec<(u32, SourceSpan)>, // Sorted by PC
|
|
pub function_names: Vec<(u32, String)>, // (func_idx, name)
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct Export {
|
|
pub symbol: String,
|
|
pub func_idx: u32,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct SyscallDecl {
|
|
pub module: String,
|
|
pub name: String,
|
|
pub version: u16,
|
|
pub arg_slots: u16,
|
|
pub ret_slots: u16,
|
|
}
|
|
|
|
const SECTION_KIND_CONST_POOL: u32 = 0;
|
|
const SECTION_KIND_FUNCTIONS: u32 = 1;
|
|
const SECTION_KIND_CODE: u32 = 2;
|
|
const SECTION_KIND_DEBUG: u32 = 3;
|
|
const SECTION_KIND_EXPORTS: u32 = 4;
|
|
const SECTION_KIND_SYSCALLS: u32 = 5;
|
|
|
|
/// Represents the final serialized format of a PBS v0 module.
|
|
///
|
|
/// This structure is a pure data container for the PBS format. It does NOT
|
|
/// contain any linker-like logic (symbol resolution, patching, etc.).
|
|
/// All multi-module programs must be flattened and linked by the compiler
|
|
/// before being serialized into this format.
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub struct BytecodeModule {
|
|
pub version: u16,
|
|
pub const_pool: Vec<ConstantPoolEntry>,
|
|
pub functions: Vec<FunctionMeta>,
|
|
pub code: Vec<u8>,
|
|
pub debug_info: Option<DebugInfo>,
|
|
pub exports: Vec<Export>,
|
|
pub syscalls: Vec<SyscallDecl>,
|
|
}
|
|
|
|
impl BytecodeModule {
|
|
pub fn serialize(&self) -> Vec<u8> {
|
|
let cp_data = self.serialize_const_pool();
|
|
let func_data = self.serialize_functions();
|
|
let code_data = self.code.clone();
|
|
let debug_data =
|
|
self.debug_info.as_ref().map(|di| self.serialize_debug(di)).unwrap_or_default();
|
|
let export_data = self.serialize_exports();
|
|
let syscall_data = self.serialize_syscalls();
|
|
|
|
let mut final_sections = Vec::new();
|
|
if !cp_data.is_empty() {
|
|
final_sections.push((SECTION_KIND_CONST_POOL, cp_data));
|
|
}
|
|
if !func_data.is_empty() {
|
|
final_sections.push((SECTION_KIND_FUNCTIONS, func_data));
|
|
}
|
|
if !code_data.is_empty() {
|
|
final_sections.push((SECTION_KIND_CODE, code_data));
|
|
}
|
|
if !debug_data.is_empty() {
|
|
final_sections.push((SECTION_KIND_DEBUG, debug_data));
|
|
}
|
|
if !export_data.is_empty() {
|
|
final_sections.push((SECTION_KIND_EXPORTS, export_data));
|
|
}
|
|
final_sections.push((SECTION_KIND_SYSCALLS, syscall_data));
|
|
|
|
let mut out = Vec::new();
|
|
// Magic "PBS\0"
|
|
out.extend_from_slice(b"PBS\0");
|
|
// Version 0
|
|
out.extend_from_slice(&0u16.to_le_bytes());
|
|
// Endianness 0 (Little Endian), Reserved
|
|
out.extend_from_slice(&[0u8, 0u8]);
|
|
// section_count
|
|
out.extend_from_slice(&(final_sections.len() as u32).to_le_bytes());
|
|
// padding to 32 bytes
|
|
out.extend_from_slice(&[0u8; 20]);
|
|
|
|
let mut current_offset = 32 + (final_sections.len() as u32 * 12);
|
|
|
|
// Write section table
|
|
for (kind, data) in &final_sections {
|
|
let k: u32 = *kind;
|
|
out.extend_from_slice(&k.to_le_bytes());
|
|
out.extend_from_slice(¤t_offset.to_le_bytes());
|
|
out.extend_from_slice(&(data.len() as u32).to_le_bytes());
|
|
current_offset += data.len() as u32;
|
|
}
|
|
|
|
// Write section data
|
|
for (_, data) in final_sections {
|
|
out.extend_from_slice(&data);
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
fn serialize_const_pool(&self) -> Vec<u8> {
|
|
if self.const_pool.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
let mut data = Vec::new();
|
|
data.extend_from_slice(&(self.const_pool.len() as u32).to_le_bytes());
|
|
for entry in &self.const_pool {
|
|
match entry {
|
|
ConstantPoolEntry::Null => data.push(0),
|
|
ConstantPoolEntry::Int64(v) => {
|
|
data.push(1);
|
|
data.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
ConstantPoolEntry::Float64(v) => {
|
|
data.push(2);
|
|
data.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
ConstantPoolEntry::Boolean(v) => {
|
|
data.push(3);
|
|
data.push(if *v { 1 } else { 0 });
|
|
}
|
|
ConstantPoolEntry::String(v) => {
|
|
data.push(4);
|
|
let s_bytes = v.as_bytes();
|
|
data.extend_from_slice(&(s_bytes.len() as u32).to_le_bytes());
|
|
data.extend_from_slice(s_bytes);
|
|
}
|
|
ConstantPoolEntry::Int32(v) => {
|
|
data.push(5);
|
|
data.extend_from_slice(&v.to_le_bytes());
|
|
}
|
|
}
|
|
}
|
|
data
|
|
}
|
|
|
|
fn serialize_functions(&self) -> Vec<u8> {
|
|
if self.functions.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
let mut data = Vec::new();
|
|
data.extend_from_slice(&(self.functions.len() as u32).to_le_bytes());
|
|
for f in &self.functions {
|
|
data.extend_from_slice(&f.code_offset.to_le_bytes());
|
|
data.extend_from_slice(&f.code_len.to_le_bytes());
|
|
data.extend_from_slice(&f.param_slots.to_le_bytes());
|
|
data.extend_from_slice(&f.local_slots.to_le_bytes());
|
|
data.extend_from_slice(&f.return_slots.to_le_bytes());
|
|
data.extend_from_slice(&f.max_stack_slots.to_le_bytes());
|
|
}
|
|
data
|
|
}
|
|
|
|
fn serialize_debug(&self, di: &DebugInfo) -> Vec<u8> {
|
|
let mut data = Vec::new();
|
|
data.extend_from_slice(&(di.pc_to_span.len() as u32).to_le_bytes());
|
|
for (pc, span) in &di.pc_to_span {
|
|
data.extend_from_slice(&pc.to_le_bytes());
|
|
data.extend_from_slice(&span.file_id.to_le_bytes());
|
|
data.extend_from_slice(&span.start.to_le_bytes());
|
|
data.extend_from_slice(&span.end.to_le_bytes());
|
|
}
|
|
data.extend_from_slice(&(di.function_names.len() as u32).to_le_bytes());
|
|
for (idx, name) in &di.function_names {
|
|
data.extend_from_slice(&idx.to_le_bytes());
|
|
let n_bytes = name.as_bytes();
|
|
data.extend_from_slice(&(n_bytes.len() as u32).to_le_bytes());
|
|
data.extend_from_slice(n_bytes);
|
|
}
|
|
data
|
|
}
|
|
|
|
fn serialize_exports(&self) -> Vec<u8> {
|
|
if self.exports.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
let mut data = Vec::new();
|
|
data.extend_from_slice(&(self.exports.len() as u32).to_le_bytes());
|
|
for exp in &self.exports {
|
|
data.extend_from_slice(&exp.func_idx.to_le_bytes());
|
|
let s_bytes = exp.symbol.as_bytes();
|
|
data.extend_from_slice(&(s_bytes.len() as u32).to_le_bytes());
|
|
data.extend_from_slice(s_bytes);
|
|
}
|
|
data
|
|
}
|
|
|
|
fn serialize_syscalls(&self) -> Vec<u8> {
|
|
let mut data = Vec::new();
|
|
data.extend_from_slice(&(self.syscalls.len() as u32).to_le_bytes());
|
|
for syscall in &self.syscalls {
|
|
let module = syscall.module.as_bytes();
|
|
let name = syscall.name.as_bytes();
|
|
assert!(u16::try_from(module.len()).is_ok(), "SYSC module name exceeds u16 length");
|
|
assert!(u16::try_from(name.len()).is_ok(), "SYSC syscall name exceeds u16 length");
|
|
|
|
data.extend_from_slice(&(module.len() as u16).to_le_bytes());
|
|
data.extend_from_slice(module);
|
|
data.extend_from_slice(&(name.len() as u16).to_le_bytes());
|
|
data.extend_from_slice(name);
|
|
data.extend_from_slice(&syscall.version.to_le_bytes());
|
|
data.extend_from_slice(&syscall.arg_slots.to_le_bytes());
|
|
data.extend_from_slice(&syscall.ret_slots.to_le_bytes());
|
|
}
|
|
data
|
|
}
|
|
}
|
|
|
|
pub struct BytecodeLoader;
|
|
|
|
impl BytecodeLoader {
|
|
pub fn load(bytes: &[u8]) -> Result<BytecodeModule, LoadError> {
|
|
if bytes.len() < 32 {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
|
|
// Magic "PBS\0"
|
|
if &bytes[0..4] != b"PBS\0" {
|
|
return Err(LoadError::InvalidMagic);
|
|
}
|
|
|
|
let version = u16::from_le_bytes([bytes[4], bytes[5]]);
|
|
if version != 0 {
|
|
return Err(LoadError::InvalidVersion);
|
|
}
|
|
|
|
let endianness = bytes[6];
|
|
if endianness != 0 {
|
|
// 0 = Little Endian
|
|
return Err(LoadError::InvalidEndianness);
|
|
}
|
|
|
|
let section_count = u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]);
|
|
|
|
let mut sections = Vec::new();
|
|
let mut pos = 32;
|
|
for _ in 0..section_count {
|
|
if pos + 12 > bytes.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let kind =
|
|
u32::from_le_bytes([bytes[pos], bytes[pos + 1], bytes[pos + 2], bytes[pos + 3]]);
|
|
let offset = u32::from_le_bytes([
|
|
bytes[pos + 4],
|
|
bytes[pos + 5],
|
|
bytes[pos + 6],
|
|
bytes[pos + 7],
|
|
]);
|
|
let length = u32::from_le_bytes([
|
|
bytes[pos + 8],
|
|
bytes[pos + 9],
|
|
bytes[pos + 10],
|
|
bytes[pos + 11],
|
|
]);
|
|
|
|
// Basic bounds check
|
|
if (offset as usize) + (length as usize) > bytes.len() {
|
|
return Err(LoadError::SectionOutOfBounds);
|
|
}
|
|
|
|
sections.push((kind, offset, length));
|
|
pos += 12;
|
|
}
|
|
|
|
// Check for overlapping sections
|
|
for i in 0..sections.len() {
|
|
for j in i + 1..sections.len() {
|
|
let (_, o1, l1) = sections[i];
|
|
let (_, o2, l2) = sections[j];
|
|
|
|
if (o1 < o2 + l2) && (o2 < o1 + l1) {
|
|
return Err(LoadError::OverlappingSections);
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut module = BytecodeModule {
|
|
version,
|
|
const_pool: Vec::new(),
|
|
functions: Vec::new(),
|
|
code: Vec::new(),
|
|
debug_info: None,
|
|
exports: Vec::new(),
|
|
syscalls: Vec::new(),
|
|
};
|
|
let mut has_syscalls = false;
|
|
|
|
for (kind, offset, length) in sections {
|
|
let section_data = &bytes[offset as usize..(offset + length) as usize];
|
|
match kind {
|
|
SECTION_KIND_CONST_POOL => {
|
|
// Const Pool
|
|
module.const_pool = parse_const_pool(section_data)?;
|
|
}
|
|
SECTION_KIND_FUNCTIONS => {
|
|
// Functions
|
|
module.functions = parse_functions(section_data)?;
|
|
}
|
|
SECTION_KIND_CODE => {
|
|
// Code
|
|
module.code = section_data.to_vec();
|
|
}
|
|
SECTION_KIND_DEBUG => {
|
|
// Debug Info
|
|
module.debug_info = Some(parse_debug_section(section_data)?);
|
|
}
|
|
SECTION_KIND_EXPORTS => {
|
|
// Exports
|
|
module.exports = parse_exports(section_data)?;
|
|
}
|
|
SECTION_KIND_SYSCALLS => {
|
|
module.syscalls = parse_syscalls(section_data)?;
|
|
has_syscalls = true;
|
|
}
|
|
_ => {} // Skip unknown or optional sections
|
|
}
|
|
}
|
|
|
|
if !has_syscalls {
|
|
return Err(LoadError::MissingSyscallSection);
|
|
}
|
|
|
|
// Additional validations
|
|
validate_module(&module)?;
|
|
|
|
Ok(module)
|
|
}
|
|
}
|
|
|
|
fn parse_const_pool(data: &[u8]) -> Result<Vec<ConstantPoolEntry>, LoadError> {
|
|
if data.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
if data.len() < 4 {
|
|
return Err(LoadError::MalformedSection);
|
|
}
|
|
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
|
let mut cp = Vec::with_capacity(count);
|
|
let mut pos = 4;
|
|
|
|
for _ in 0..count {
|
|
if pos >= data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let tag = data[pos];
|
|
pos += 1;
|
|
match tag {
|
|
0 => cp.push(ConstantPoolEntry::Null),
|
|
1 => {
|
|
// Int64
|
|
if pos + 8 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let val = i64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
|
|
cp.push(ConstantPoolEntry::Int64(val));
|
|
pos += 8;
|
|
}
|
|
2 => {
|
|
// Float64
|
|
if pos + 8 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let val = f64::from_le_bytes(data[pos..pos + 8].try_into().unwrap());
|
|
cp.push(ConstantPoolEntry::Float64(val));
|
|
pos += 8;
|
|
}
|
|
3 => {
|
|
// Boolean
|
|
if pos >= data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
cp.push(ConstantPoolEntry::Boolean(data[pos] != 0));
|
|
pos += 1;
|
|
}
|
|
4 => {
|
|
// String
|
|
if pos + 4 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let len = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
|
|
pos += 4;
|
|
if pos + len > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let s = String::from_utf8_lossy(&data[pos..pos + len]).into_owned();
|
|
cp.push(ConstantPoolEntry::String(s));
|
|
pos += len;
|
|
}
|
|
5 => {
|
|
// Int32
|
|
if pos + 4 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let val = i32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
|
|
cp.push(ConstantPoolEntry::Int32(val));
|
|
pos += 4;
|
|
}
|
|
_ => return Err(LoadError::MalformedSection),
|
|
}
|
|
}
|
|
Ok(cp)
|
|
}
|
|
|
|
fn parse_functions(data: &[u8]) -> Result<Vec<FunctionMeta>, LoadError> {
|
|
if data.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
if data.len() < 4 {
|
|
return Err(LoadError::MalformedSection);
|
|
}
|
|
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
|
let mut functions = Vec::with_capacity(count);
|
|
let mut pos = 4;
|
|
|
|
for _ in 0..count {
|
|
if pos + 16 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let code_offset = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
|
|
let code_len = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
|
|
let param_slots = u16::from_le_bytes(data[pos + 8..pos + 10].try_into().unwrap());
|
|
let local_slots = u16::from_le_bytes(data[pos + 10..pos + 12].try_into().unwrap());
|
|
let return_slots = u16::from_le_bytes(data[pos + 12..pos + 14].try_into().unwrap());
|
|
let max_stack_slots = u16::from_le_bytes(data[pos + 14..pos + 16].try_into().unwrap());
|
|
|
|
functions.push(FunctionMeta {
|
|
code_offset,
|
|
code_len,
|
|
param_slots,
|
|
local_slots,
|
|
return_slots,
|
|
max_stack_slots,
|
|
});
|
|
pos += 16;
|
|
}
|
|
Ok(functions)
|
|
}
|
|
|
|
fn parse_debug_section(data: &[u8]) -> Result<DebugInfo, LoadError> {
|
|
if data.is_empty() {
|
|
return Ok(DebugInfo::default());
|
|
}
|
|
if data.len() < 8 {
|
|
return Err(LoadError::MalformedSection);
|
|
}
|
|
|
|
let mut pos = 0;
|
|
|
|
// PC to Span table
|
|
let span_count = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
|
|
pos += 4;
|
|
let mut pc_to_span = Vec::with_capacity(span_count);
|
|
for _ in 0..span_count {
|
|
if pos + 16 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let pc = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
|
|
let file_id = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap());
|
|
let start = u32::from_le_bytes(data[pos + 8..pos + 12].try_into().unwrap());
|
|
let end = u32::from_le_bytes(data[pos + 12..pos + 16].try_into().unwrap());
|
|
pc_to_span.push((pc, SourceSpan { file_id, start, end }));
|
|
pos += 16;
|
|
}
|
|
|
|
// Function names table
|
|
if pos + 4 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let func_name_count = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
|
|
pos += 4;
|
|
let mut function_names = Vec::with_capacity(func_name_count);
|
|
for _ in 0..func_name_count {
|
|
if pos + 8 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let func_idx = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
|
|
let name_len = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()) as usize;
|
|
pos += 8;
|
|
if pos + name_len > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let name = String::from_utf8_lossy(&data[pos..pos + name_len]).into_owned();
|
|
function_names.push((func_idx, name));
|
|
pos += name_len;
|
|
}
|
|
|
|
Ok(DebugInfo { pc_to_span, function_names })
|
|
}
|
|
|
|
fn parse_exports(data: &[u8]) -> Result<Vec<Export>, LoadError> {
|
|
if data.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
if data.len() < 4 {
|
|
return Err(LoadError::MalformedSection);
|
|
}
|
|
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
|
let mut exports = Vec::with_capacity(count);
|
|
let mut pos = 4;
|
|
|
|
for _ in 0..count {
|
|
if pos + 8 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let func_idx = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap());
|
|
let name_len = u32::from_le_bytes(data[pos + 4..pos + 8].try_into().unwrap()) as usize;
|
|
pos += 8;
|
|
if pos + name_len > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let symbol = String::from_utf8_lossy(&data[pos..pos + name_len]).into_owned();
|
|
exports.push(Export { symbol, func_idx });
|
|
pos += name_len;
|
|
}
|
|
Ok(exports)
|
|
}
|
|
|
|
fn parse_syscalls(data: &[u8]) -> Result<Vec<SyscallDecl>, LoadError> {
|
|
if data.len() < 4 {
|
|
return Err(LoadError::MalformedSection);
|
|
}
|
|
|
|
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
|
|
let mut syscalls = Vec::with_capacity(count);
|
|
let mut pos = 4;
|
|
|
|
for _ in 0..count {
|
|
if pos + 2 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let module_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
|
|
pos += 2;
|
|
if pos + module_len > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let module = std::str::from_utf8(&data[pos..pos + module_len])
|
|
.map_err(|_| LoadError::InvalidUtf8)?;
|
|
pos += module_len;
|
|
|
|
if pos + 2 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let name_len = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap()) as usize;
|
|
pos += 2;
|
|
if pos + name_len > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let name =
|
|
std::str::from_utf8(&data[pos..pos + name_len]).map_err(|_| LoadError::InvalidUtf8)?;
|
|
pos += name_len;
|
|
|
|
if pos + 6 > data.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let version = u16::from_le_bytes(data[pos..pos + 2].try_into().unwrap());
|
|
let arg_slots = u16::from_le_bytes(data[pos + 2..pos + 4].try_into().unwrap());
|
|
let ret_slots = u16::from_le_bytes(data[pos + 4..pos + 6].try_into().unwrap());
|
|
pos += 6;
|
|
|
|
syscalls.push(SyscallDecl {
|
|
module: module.to_owned(),
|
|
name: name.to_owned(),
|
|
version,
|
|
arg_slots,
|
|
ret_slots,
|
|
});
|
|
}
|
|
|
|
if pos != data.len() {
|
|
return Err(LoadError::MalformedSection);
|
|
}
|
|
|
|
Ok(syscalls)
|
|
}
|
|
|
|
fn validate_module(module: &BytecodeModule) -> Result<(), LoadError> {
|
|
let mut syscall_identities = HashSet::with_capacity(module.syscalls.len());
|
|
for syscall in &module.syscalls {
|
|
if !syscall_identities.insert((
|
|
syscall.module.clone(),
|
|
syscall.name.clone(),
|
|
syscall.version,
|
|
)) {
|
|
return Err(LoadError::DuplicateSyscallIdentity);
|
|
}
|
|
}
|
|
|
|
for func in &module.functions {
|
|
// Opcode stream bounds
|
|
if (func.code_offset as usize) + (func.code_len as usize) > module.code.len() {
|
|
return Err(LoadError::InvalidFunctionIndex);
|
|
}
|
|
}
|
|
|
|
// Basic opcode scan for const pool indices
|
|
let mut pos = 0;
|
|
while pos < module.code.len() {
|
|
if pos + 2 > module.code.len() {
|
|
break; // Unexpected EOF in middle of opcode, maybe should be error
|
|
}
|
|
let op_val = u16::from_le_bytes([module.code[pos], module.code[pos + 1]]);
|
|
let opcode = OpCode::try_from(op_val).map_err(|_| LoadError::InvalidOpcode)?;
|
|
pos += 2;
|
|
|
|
match opcode {
|
|
OpCode::PushConst => {
|
|
if pos + 4 > module.code.len() {
|
|
return Err(LoadError::UnexpectedEof);
|
|
}
|
|
let idx =
|
|
u32::from_le_bytes(module.code[pos..pos + 4].try_into().unwrap()) as usize;
|
|
if idx >= module.const_pool.len() {
|
|
return Err(LoadError::InvalidConstIndex);
|
|
}
|
|
pos += 4;
|
|
}
|
|
OpCode::PushI32
|
|
| OpCode::Jmp
|
|
| OpCode::JmpIfFalse
|
|
| OpCode::JmpIfTrue
|
|
| OpCode::GetGlobal
|
|
| OpCode::SetGlobal
|
|
| OpCode::GetLocal
|
|
| OpCode::SetLocal
|
|
| OpCode::PopN
|
|
| OpCode::Hostcall
|
|
| OpCode::Syscall
|
|
| OpCode::Intrinsic => {
|
|
pos += 4;
|
|
}
|
|
OpCode::PushI64 | OpCode::PushF64 => {
|
|
pos += 8;
|
|
}
|
|
OpCode::PushBool => {
|
|
pos += 1;
|
|
}
|
|
OpCode::Call => {
|
|
pos += 4;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn create_header(section_count: u32) -> Vec<u8> {
|
|
let mut h = vec![0u8; 32];
|
|
h[0..4].copy_from_slice(b"PBS\0");
|
|
h[4..6].copy_from_slice(&0u16.to_le_bytes()); // version
|
|
h[6] = 0; // endianness
|
|
h[8..12].copy_from_slice(§ion_count.to_le_bytes());
|
|
h
|
|
}
|
|
|
|
fn minimal_module() -> BytecodeModule {
|
|
BytecodeModule {
|
|
version: 0,
|
|
const_pool: vec![],
|
|
functions: vec![],
|
|
code: vec![],
|
|
debug_info: None,
|
|
exports: vec![],
|
|
syscalls: vec![],
|
|
}
|
|
}
|
|
|
|
fn build_pbs_with_sections(sections: Vec<(u32, Vec<u8>)>) -> Vec<u8> {
|
|
let mut data = create_header(sections.len() as u32);
|
|
let mut offset = 32 + (sections.len() as u32 * 12);
|
|
|
|
for (kind, section_data) in §ions {
|
|
data.extend_from_slice(&kind.to_le_bytes());
|
|
data.extend_from_slice(&offset.to_le_bytes());
|
|
data.extend_from_slice(&(section_data.len() as u32).to_le_bytes());
|
|
offset += section_data.len() as u32;
|
|
}
|
|
|
|
for (_, section_data) in sections {
|
|
data.extend_from_slice(§ion_data);
|
|
}
|
|
|
|
data
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_magic() {
|
|
let mut data = create_header(0);
|
|
data[0] = b'X';
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidMagic));
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_version() {
|
|
let mut data = create_header(0);
|
|
data[4] = 1;
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidVersion));
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_endianness() {
|
|
let mut data = create_header(0);
|
|
data[6] = 1;
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidEndianness));
|
|
}
|
|
|
|
#[test]
|
|
fn test_overlapping_sections() {
|
|
let mut data = create_header(2);
|
|
// Section 1: Kind 0, Offset 64, Length 32
|
|
data.extend_from_slice(&0u32.to_le_bytes());
|
|
data.extend_from_slice(&64u32.to_le_bytes());
|
|
data.extend_from_slice(&32u32.to_le_bytes());
|
|
// Section 2: Kind 1, Offset 80, Length 32 (Overlaps with Section 1)
|
|
data.extend_from_slice(&1u32.to_le_bytes());
|
|
data.extend_from_slice(&80u32.to_le_bytes());
|
|
data.extend_from_slice(&32u32.to_le_bytes());
|
|
|
|
// Ensure data is long enough for the offsets
|
|
data.resize(256, 0);
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::OverlappingSections));
|
|
}
|
|
|
|
#[test]
|
|
fn test_section_out_of_bounds() {
|
|
let mut data = create_header(1);
|
|
// Section 1: Kind 0, Offset 64, Length 1000
|
|
data.extend_from_slice(&0u32.to_le_bytes());
|
|
data.extend_from_slice(&64u32.to_le_bytes());
|
|
data.extend_from_slice(&1000u32.to_le_bytes());
|
|
|
|
data.resize(256, 0);
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::SectionOutOfBounds));
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_function_code_offset() {
|
|
let mut data = create_header(3);
|
|
// Section 1: Functions, Kind 1, Offset 80, Length 20 (Header 4 + 1 entry 16)
|
|
data.extend_from_slice(&1u32.to_le_bytes());
|
|
data.extend_from_slice(&80u32.to_le_bytes());
|
|
data.extend_from_slice(&20u32.to_le_bytes());
|
|
|
|
// Section 2: Code, Kind 2, Offset 128, Length 10
|
|
data.extend_from_slice(&2u32.to_le_bytes());
|
|
data.extend_from_slice(&128u32.to_le_bytes());
|
|
data.extend_from_slice(&10u32.to_le_bytes());
|
|
|
|
// Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty)
|
|
data.extend_from_slice(&5u32.to_le_bytes());
|
|
data.extend_from_slice(&160u32.to_le_bytes());
|
|
data.extend_from_slice(&4u32.to_le_bytes());
|
|
|
|
data.resize(256, 0);
|
|
|
|
// Setup functions section
|
|
let func_data_start = 80;
|
|
data[func_data_start..func_data_start + 4].copy_from_slice(&1u32.to_le_bytes()); // 1 function
|
|
let entry_start = func_data_start + 4;
|
|
data[entry_start..entry_start + 4].copy_from_slice(&5u32.to_le_bytes()); // code_offset = 5
|
|
data[entry_start + 4..entry_start + 8].copy_from_slice(&10u32.to_le_bytes()); // code_len = 10
|
|
// 5 + 10 = 15 > 10 (code section length)
|
|
data[160..164].copy_from_slice(&0u32.to_le_bytes());
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidFunctionIndex));
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_const_index() {
|
|
let mut data = create_header(3);
|
|
// Section 1: Const Pool, Kind 0, Offset 80, Length 4 (Empty CP)
|
|
data.extend_from_slice(&0u32.to_le_bytes());
|
|
data.extend_from_slice(&80u32.to_le_bytes());
|
|
data.extend_from_slice(&4u32.to_le_bytes());
|
|
|
|
// Section 2: Code, Kind 2, Offset 128, Length 6 (PushConst 0)
|
|
data.extend_from_slice(&2u32.to_le_bytes());
|
|
data.extend_from_slice(&128u32.to_le_bytes());
|
|
data.extend_from_slice(&6u32.to_le_bytes());
|
|
|
|
// Section 3: SYSC, Kind 5, Offset 160, Length 4 (empty)
|
|
data.extend_from_slice(&5u32.to_le_bytes());
|
|
data.extend_from_slice(&160u32.to_le_bytes());
|
|
data.extend_from_slice(&4u32.to_le_bytes());
|
|
|
|
data.resize(256, 0);
|
|
|
|
// Setup empty CP
|
|
data[80..84].copy_from_slice(&0u32.to_le_bytes());
|
|
|
|
// Setup code with PushConst 0
|
|
data[128..130].copy_from_slice(&(OpCode::PushConst as u16).to_le_bytes());
|
|
data[130..134].copy_from_slice(&0u32.to_le_bytes());
|
|
data[160..164].copy_from_slice(&0u32.to_le_bytes());
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidConstIndex));
|
|
}
|
|
|
|
#[test]
|
|
fn test_missing_sysc_section_is_rejected() {
|
|
let data = create_header(0);
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MissingSyscallSection));
|
|
}
|
|
|
|
#[test]
|
|
fn test_valid_minimal_load_with_empty_sysc() {
|
|
let data = minimal_module().serialize();
|
|
let module = BytecodeLoader::load(&data).unwrap();
|
|
assert_eq!(module.version, 0);
|
|
assert!(module.const_pool.is_empty());
|
|
assert!(module.functions.is_empty());
|
|
assert!(module.code.is_empty());
|
|
assert!(module.syscalls.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_valid_sysc_roundtrip() {
|
|
let mut module = minimal_module();
|
|
module.syscalls = vec![SyscallDecl {
|
|
module: "gfx".into(),
|
|
name: "draw_line".into(),
|
|
version: 1,
|
|
arg_slots: 4,
|
|
ret_slots: 0,
|
|
}];
|
|
|
|
let data = module.serialize();
|
|
let loaded = BytecodeLoader::load(&data).unwrap();
|
|
|
|
assert_eq!(loaded.syscalls, module.syscalls);
|
|
}
|
|
|
|
#[test]
|
|
fn test_malformed_sysc_section_is_rejected() {
|
|
let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, vec![1, 0, 0])]);
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::MalformedSection));
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_utf8_in_sysc_section_is_rejected() {
|
|
let mut sysc = Vec::new();
|
|
sysc.extend_from_slice(&1u32.to_le_bytes());
|
|
sysc.extend_from_slice(&1u16.to_le_bytes());
|
|
sysc.push(0xFF);
|
|
sysc.extend_from_slice(&1u16.to_le_bytes());
|
|
sysc.push(b'x');
|
|
sysc.extend_from_slice(&1u16.to_le_bytes());
|
|
sysc.extend_from_slice(&0u16.to_le_bytes());
|
|
sysc.extend_from_slice(&0u16.to_le_bytes());
|
|
|
|
let data = build_pbs_with_sections(vec![(SECTION_KIND_SYSCALLS, sysc)]);
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::InvalidUtf8));
|
|
}
|
|
|
|
#[test]
|
|
fn test_duplicate_sysc_identity_is_rejected() {
|
|
let mut module = minimal_module();
|
|
module.syscalls = vec![
|
|
SyscallDecl {
|
|
module: "system".into(),
|
|
name: "has_cart".into(),
|
|
version: 1,
|
|
arg_slots: 0,
|
|
ret_slots: 1,
|
|
},
|
|
SyscallDecl {
|
|
module: "system".into(),
|
|
name: "has_cart".into(),
|
|
version: 1,
|
|
arg_slots: 0,
|
|
ret_slots: 1,
|
|
},
|
|
];
|
|
|
|
let data = module.serialize();
|
|
|
|
assert_eq!(BytecodeLoader::load(&data), Err(LoadError::DuplicateSyscallIdentity));
|
|
}
|
|
}
|